blob: 8aa57ba729333bbd119604c06800d599a7bac2de [file] [log] [blame]
Nicolai Haehnle213e87f2016-03-21 20:28:33 +00001//===-- SIWholeQuadMode.cpp - enter and suspend whole quad mode -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief This pass adds instructions to enable whole quad mode for pixel
Connor Abbott92638ab2017-08-04 18:36:52 +000012/// shaders, and whole wavefront mode for all programs.
Nicolai Haehnle213e87f2016-03-21 20:28:33 +000013///
14/// Whole quad mode is required for derivative computations, but it interferes
15/// with shader side effects (stores and atomics). This pass is run on the
16/// scheduled machine IR but before register coalescing, so that machine SSA is
17/// available for analysis. It ensures that WQM is enabled when necessary, but
18/// disabled around stores and atomics.
19///
20/// When necessary, this pass creates a function prolog
21///
22/// S_MOV_B64 LiveMask, EXEC
23/// S_WQM_B64 EXEC, EXEC
24///
25/// to enter WQM at the top of the function and surrounds blocks of Exact
26/// instructions by
27///
28/// S_AND_SAVEEXEC_B64 Tmp, LiveMask
29/// ...
30/// S_MOV_B64 EXEC, Tmp
31///
Connor Abbott92638ab2017-08-04 18:36:52 +000032/// We also compute when a sequence of instructions requires Whole Wavefront
33/// Mode (WWM) and insert instructions to save and restore it:
34///
35/// S_OR_SAVEEXEC_B64 Tmp, -1
36/// ...
37/// S_MOV_B64 EXEC, Tmp
38///
Nicolai Haehnle213e87f2016-03-21 20:28:33 +000039/// In order to avoid excessive switching during sequences of Exact
40/// instructions, the pass first analyzes which instructions must be run in WQM
41/// (aka which instructions produce values that lead to derivative
42/// computations).
43///
44/// Basic blocks are always exited in WQM as long as some successor needs WQM.
45///
46/// There is room for improvement given better control flow analysis:
47///
48/// (1) at the top level (outside of control flow statements, and as long as
49/// kill hasn't been used), one SGPR can be saved by recovering WQM from
50/// the LiveMask (this is implemented for the entry block).
51///
52/// (2) when entire regions (e.g. if-else blocks or entire loops) only
53/// consist of exact and don't-care instructions, the switch only has to
54/// be done at the entry and exit points rather than potentially in each
55/// block of the region.
56///
57//===----------------------------------------------------------------------===//
58
59#include "AMDGPU.h"
60#include "AMDGPUSubtarget.h"
61#include "SIInstrInfo.h"
62#include "SIMachineFunctionInfo.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000063#include "llvm/ADT/DenseMap.h"
Connor Abbottde068fe2017-08-04 18:36:50 +000064#include "llvm/ADT/PostOrderIterator.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000065#include "llvm/ADT/SmallVector.h"
66#include "llvm/ADT/StringRef.h"
67#include "llvm/CodeGen/LiveInterval.h"
68#include "llvm/CodeGen/LiveIntervalAnalysis.h"
69#include "llvm/CodeGen/MachineBasicBlock.h"
Nicolai Haehnle213e87f2016-03-21 20:28:33 +000070#include "llvm/CodeGen/MachineFunction.h"
71#include "llvm/CodeGen/MachineFunctionPass.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000072#include "llvm/CodeGen/MachineInstr.h"
Nicolai Haehnle213e87f2016-03-21 20:28:33 +000073#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000074#include "llvm/CodeGen/MachineOperand.h"
Nicolai Haehnle213e87f2016-03-21 20:28:33 +000075#include "llvm/CodeGen/MachineRegisterInfo.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000076#include "llvm/CodeGen/SlotIndexes.h"
77#include "llvm/IR/CallingConv.h"
78#include "llvm/IR/DebugLoc.h"
79#include "llvm/MC/MCRegisterInfo.h"
80#include "llvm/Pass.h"
81#include "llvm/Support/Debug.h"
82#include "llvm/Support/raw_ostream.h"
83#include "llvm/Target/TargetRegisterInfo.h"
84#include <cassert>
85#include <vector>
Nicolai Haehnle213e87f2016-03-21 20:28:33 +000086
87using namespace llvm;
88
89#define DEBUG_TYPE "si-wqm"
90
91namespace {
92
93enum {
94 StateWQM = 0x1,
Connor Abbott92638ab2017-08-04 18:36:52 +000095 StateWWM = 0x2,
96 StateExact = 0x4,
Nicolai Haehnle213e87f2016-03-21 20:28:33 +000097};
98
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +000099struct PrintState {
100public:
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000101 int State;
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000102
103 explicit PrintState(int State) : State(State) {}
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000104};
105
106static raw_ostream &operator<<(raw_ostream &OS, const PrintState &PS) {
107 if (PS.State & StateWQM)
108 OS << "WQM";
Connor Abbott92638ab2017-08-04 18:36:52 +0000109 if (PS.State & StateWWM) {
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000110 if (PS.State & StateWQM)
111 OS << '|';
Connor Abbott92638ab2017-08-04 18:36:52 +0000112 OS << "WWM";
113 }
114 if (PS.State & StateExact) {
115 if (PS.State & (StateWQM | StateWWM))
116 OS << '|';
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000117 OS << "Exact";
118 }
119
120 return OS;
121}
122
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000123struct InstrInfo {
124 char Needs = 0;
Connor Abbottde068fe2017-08-04 18:36:50 +0000125 char Disabled = 0;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000126 char OutNeeds = 0;
127};
128
129struct BlockInfo {
130 char Needs = 0;
131 char InNeeds = 0;
132 char OutNeeds = 0;
133};
134
135struct WorkItem {
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000136 MachineBasicBlock *MBB = nullptr;
137 MachineInstr *MI = nullptr;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000138
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000139 WorkItem() = default;
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000140 WorkItem(MachineBasicBlock *MBB) : MBB(MBB) {}
141 WorkItem(MachineInstr *MI) : MI(MI) {}
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000142};
143
144class SIWholeQuadMode : public MachineFunctionPass {
145private:
Connor Abbott92638ab2017-08-04 18:36:52 +0000146 CallingConv::ID CallingConv;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000147 const SIInstrInfo *TII;
148 const SIRegisterInfo *TRI;
149 MachineRegisterInfo *MRI;
Nicolai Haehnlebef0e902016-08-02 19:17:37 +0000150 LiveIntervals *LIS;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000151
152 DenseMap<const MachineInstr *, InstrInfo> Instructions;
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000153 DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000154 SmallVector<MachineInstr *, 1> LiveMaskQueries;
Connor Abbott8c217d02017-08-04 18:36:49 +0000155 SmallVector<MachineInstr *, 4> LowerToCopyInstrs;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000156
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000157 void printInfo();
158
Nicolai Haehnlebef0e902016-08-02 19:17:37 +0000159 void markInstruction(MachineInstr &MI, char Flag,
160 std::vector<WorkItem> &Worklist);
Connor Abbottde068fe2017-08-04 18:36:50 +0000161 void markInstructionUses(const MachineInstr &MI, char Flag,
162 std::vector<WorkItem> &Worklist);
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000163 char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
164 void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
165 void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000166 char analyzeFunction(MachineFunction &MF);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000167
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000168 bool requiresCorrectState(const MachineInstr &MI) const;
169
170 MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,
171 MachineBasicBlock::iterator Before);
172 MachineBasicBlock::iterator
173 prepareInsertion(MachineBasicBlock &MBB, MachineBasicBlock::iterator First,
174 MachineBasicBlock::iterator Last, bool PreferLast,
175 bool SaveSCC);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000176 void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
177 unsigned SaveWQM, unsigned LiveMaskReg);
178 void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
179 unsigned SavedWQM);
Connor Abbott92638ab2017-08-04 18:36:52 +0000180 void toWWM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
181 unsigned SaveOrig);
182 void fromWWM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
183 unsigned SavedOrig);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000184 void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);
185
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000186 void lowerLiveMaskQueries(unsigned LiveMaskReg);
Connor Abbott8c217d02017-08-04 18:36:49 +0000187 void lowerCopyInstrs();
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000188
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000189public:
190 static char ID;
191
192 SIWholeQuadMode() :
193 MachineFunctionPass(ID) { }
194
195 bool runOnMachineFunction(MachineFunction &MF) override;
196
Mehdi Amini117296c2016-10-01 02:56:57 +0000197 StringRef getPassName() const override { return "SI Whole Quad Mode"; }
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000198
199 void getAnalysisUsage(AnalysisUsage &AU) const override {
Nicolai Haehnlebef0e902016-08-02 19:17:37 +0000200 AU.addRequired<LiveIntervals>();
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000201 AU.setPreservesCFG();
202 MachineFunctionPass::getAnalysisUsage(AU);
203 }
204};
205
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000206} // end anonymous namespace
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000207
208char SIWholeQuadMode::ID = 0;
209
Nicolai Haehnlebef0e902016-08-02 19:17:37 +0000210INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
211 false)
212INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
213INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
214 false)
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000215
216char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
217
218FunctionPass *llvm::createSIWholeQuadModePass() {
219 return new SIWholeQuadMode;
220}
221
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000222void SIWholeQuadMode::printInfo() {
223 for (const auto &BII : Blocks) {
224 dbgs() << "\nBB#" << BII.first->getNumber() << ":\n"
225 << " InNeeds = " << PrintState(BII.second.InNeeds)
226 << ", Needs = " << PrintState(BII.second.Needs)
227 << ", OutNeeds = " << PrintState(BII.second.OutNeeds) << "\n\n";
228
229 for (const MachineInstr &MI : *BII.first) {
230 auto III = Instructions.find(&MI);
231 if (III == Instructions.end())
232 continue;
233
234 dbgs() << " " << MI << " Needs = " << PrintState(III->second.Needs)
235 << ", OutNeeds = " << PrintState(III->second.OutNeeds) << '\n';
236 }
237 }
238}
239
Nicolai Haehnlebef0e902016-08-02 19:17:37 +0000240void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
241 std::vector<WorkItem> &Worklist) {
242 InstrInfo &II = Instructions[&MI];
243
Connor Abbott92638ab2017-08-04 18:36:52 +0000244 assert(!(Flag & StateExact) && Flag != 0);
Nicolai Haehnlebef0e902016-08-02 19:17:37 +0000245
Connor Abbottde068fe2017-08-04 18:36:50 +0000246 // Remove any disabled states from the flag. The user that required it gets
247 // an undefined value in the helper lanes. For example, this can happen if
248 // the result of an atomic is used by instruction that requires WQM, where
249 // ignoring the request for WQM is correct as per the relevant specs.
250 Flag &= ~II.Disabled;
251
252 // Ignore if the flag is already encompassed by the existing needs, or we
253 // just disabled everything.
254 if ((II.Needs & Flag) == Flag)
Nicolai Haehnlebef0e902016-08-02 19:17:37 +0000255 return;
256
Connor Abbottde068fe2017-08-04 18:36:50 +0000257 II.Needs |= Flag;
Nicolai Haehnlebef0e902016-08-02 19:17:37 +0000258 Worklist.push_back(&MI);
259}
260
Connor Abbottde068fe2017-08-04 18:36:50 +0000261/// Mark all instructions defining the uses in \p MI with \p Flag.
262void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
263 std::vector<WorkItem> &Worklist) {
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000264 for (const MachineOperand &Use : MI.uses()) {
265 if (!Use.isReg() || !Use.isUse())
266 continue;
267
268 unsigned Reg = Use.getReg();
269
270 // Handle physical registers that we need to track; this is mostly relevant
271 // for VCC, which can appear as the (implicit) input of a uniform branch,
272 // e.g. when a loop counter is stored in a VGPR.
273 if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
274 if (Reg == AMDGPU::EXEC)
275 continue;
276
277 for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
278 LiveRange &LR = LIS->getRegUnit(*RegUnit);
279 const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
280 if (!Value)
281 continue;
282
283 // Since we're in machine SSA, we do not need to track physical
284 // registers across basic blocks.
285 if (Value->isPHIDef())
286 continue;
287
Connor Abbottde068fe2017-08-04 18:36:50 +0000288 markInstruction(*LIS->getInstructionFromIndex(Value->def), Flag,
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000289 Worklist);
290 }
291
292 continue;
293 }
294
295 for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
Connor Abbottde068fe2017-08-04 18:36:50 +0000296 markInstruction(DefMI, Flag, Worklist);
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000297 }
298}
299
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000300// Scan instructions to determine which ones require an Exact execmask and
301// which ones seed WQM requirements.
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000302char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000303 std::vector<WorkItem> &Worklist) {
304 char GlobalFlags = 0;
Nicolai Haehnlec00e03b2016-06-07 21:37:17 +0000305 bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");
Connor Abbott66b9bd62017-08-04 18:36:54 +0000306 SmallVector<MachineInstr *, 4> SetInactiveInstrs;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000307
Connor Abbottde068fe2017-08-04 18:36:50 +0000308 // We need to visit the basic blocks in reverse post-order so that we visit
309 // defs before uses, in particular so that we don't accidentally mark an
310 // instruction as needing e.g. WQM before visiting it and realizing it needs
311 // WQM disabled.
312 ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
313 for (auto BI = RPOT.begin(), BE = RPOT.end(); BI != BE; ++BI) {
314 MachineBasicBlock &MBB = **BI;
315 BlockInfo &BBI = Blocks[&MBB];
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000316
317 for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000318 MachineInstr &MI = *II;
Connor Abbottde068fe2017-08-04 18:36:50 +0000319 InstrInfo &III = Instructions[&MI];
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000320 unsigned Opcode = MI.getOpcode();
Nicolai Haehnlec00e03b2016-06-07 21:37:17 +0000321 char Flags = 0;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000322
Connor Abbott92638ab2017-08-04 18:36:52 +0000323 if (TII->isDS(Opcode) && CallingConv == CallingConv::AMDGPU_PS) {
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000324 Flags = StateWQM;
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000325 } else if (TII->isWQM(Opcode)) {
326 // Sampling instructions don't need to produce results for all pixels
327 // in a quad, they just require all inputs of a quad to have been
328 // computed for derivatives.
Connor Abbottde068fe2017-08-04 18:36:50 +0000329 markInstructionUses(MI, StateWQM, Worklist);
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000330 GlobalFlags |= StateWQM;
331 continue;
Connor Abbott8c217d02017-08-04 18:36:49 +0000332 } else if (Opcode == AMDGPU::WQM) {
333 // The WQM intrinsic requires its output to have all the helper lanes
334 // correct, so we need it to be in WQM.
335 Flags = StateWQM;
336 LowerToCopyInstrs.push_back(&MI);
Connor Abbott92638ab2017-08-04 18:36:52 +0000337 } else if (Opcode == AMDGPU::WWM) {
338 // The WWM intrinsic doesn't make the same guarantee, and plus it needs
339 // to be executed in WQM or Exact so that its copy doesn't clobber
340 // inactive lanes.
341 markInstructionUses(MI, StateWWM, Worklist);
342 GlobalFlags |= StateWWM;
343 LowerToCopyInstrs.push_back(&MI);
344 continue;
Connor Abbott66b9bd62017-08-04 18:36:54 +0000345 } else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
346 Opcode == AMDGPU::V_SET_INACTIVE_B64) {
347 III.Disabled = StateWWM;
348 MachineOperand &Inactive = MI.getOperand(2);
349 if (Inactive.isReg()) {
350 if (Inactive.isUndef()) {
351 LowerToCopyInstrs.push_back(&MI);
352 } else {
353 unsigned Reg = Inactive.getReg();
354 if (TargetRegisterInfo::isVirtualRegister(Reg)) {
355 for (MachineInstr &DefMI : MRI->def_instructions(Reg))
356 markInstruction(DefMI, StateWWM, Worklist);
357 }
358 }
359 }
360 SetInactiveInstrs.push_back(&MI);
361 continue;
Nicolai Haehnle8a482b32016-08-02 19:31:14 +0000362 } else if (TII->isDisableWQM(MI)) {
Connor Abbottde068fe2017-08-04 18:36:50 +0000363 BBI.Needs |= StateExact;
364 if (!(BBI.InNeeds & StateExact)) {
365 BBI.InNeeds |= StateExact;
366 Worklist.push_back(&MBB);
367 }
368 GlobalFlags |= StateExact;
Connor Abbott92638ab2017-08-04 18:36:52 +0000369 III.Disabled = StateWQM | StateWWM;
Connor Abbottde068fe2017-08-04 18:36:50 +0000370 continue;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000371 } else {
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000372 if (Opcode == AMDGPU::SI_PS_LIVE) {
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000373 LiveMaskQueries.push_back(&MI);
Nicolai Haehnlec00e03b2016-06-07 21:37:17 +0000374 } else if (WQMOutputs) {
375 // The function is in machine SSA form, which means that physical
376 // VGPRs correspond to shader inputs and outputs. Inputs are
377 // only used, outputs are only defined.
378 for (const MachineOperand &MO : MI.defs()) {
379 if (!MO.isReg())
380 continue;
381
382 unsigned Reg = MO.getReg();
383
384 if (!TRI->isVirtualRegister(Reg) &&
385 TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) {
386 Flags = StateWQM;
387 break;
388 }
389 }
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000390 }
391
Nicolai Haehnlec00e03b2016-06-07 21:37:17 +0000392 if (!Flags)
393 continue;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000394 }
395
Nicolai Haehnlebef0e902016-08-02 19:17:37 +0000396 markInstruction(MI, Flags, Worklist);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000397 GlobalFlags |= Flags;
398 }
399 }
400
Connor Abbott66b9bd62017-08-04 18:36:54 +0000401 // Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is
402 // ever used anywhere in the function. This implements the corresponding
403 // semantics of @llvm.amdgcn.set.inactive.
404 if (GlobalFlags & StateWQM) {
405 for (MachineInstr *MI : SetInactiveInstrs)
406 markInstruction(*MI, StateWQM, Worklist);
407 }
408
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000409 return GlobalFlags;
410}
411
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000412void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000413 std::vector<WorkItem>& Worklist) {
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000414 MachineBasicBlock *MBB = MI.getParent();
Nicolai Haehnle0a33abd2016-03-21 22:54:02 +0000415 InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000416 BlockInfo &BI = Blocks[MBB];
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000417
Nicolai Haehnle8a482b32016-08-02 19:31:14 +0000418 // Control flow-type instructions and stores to temporary memory that are
419 // followed by WQM computations must themselves be in WQM.
Connor Abbottde068fe2017-08-04 18:36:50 +0000420 if ((II.OutNeeds & StateWQM) && !(II.Disabled & StateWQM) &&
Nicolai Haehnle8a482b32016-08-02 19:31:14 +0000421 (MI.isTerminator() || (TII->usesVM_CNT(MI) && MI.mayStore()))) {
Nicolai Haehnle0a33abd2016-03-21 22:54:02 +0000422 Instructions[&MI].Needs = StateWQM;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000423 II.Needs = StateWQM;
Nicolai Haehnle0a33abd2016-03-21 22:54:02 +0000424 }
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000425
426 // Propagate to block level
Connor Abbottde068fe2017-08-04 18:36:50 +0000427 if (II.Needs & StateWQM) {
428 BI.Needs |= StateWQM;
429 if (!(BI.InNeeds & StateWQM)) {
430 BI.InNeeds |= StateWQM;
431 Worklist.push_back(MBB);
432 }
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000433 }
434
435 // Propagate backwards within block
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000436 if (MachineInstr *PrevMI = MI.getPrevNode()) {
Connor Abbott92638ab2017-08-04 18:36:52 +0000437 char InNeeds = (II.Needs & ~StateWWM) | II.OutNeeds;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000438 if (!PrevMI->isPHI()) {
439 InstrInfo &PrevII = Instructions[PrevMI];
440 if ((PrevII.OutNeeds | InNeeds) != PrevII.OutNeeds) {
441 PrevII.OutNeeds |= InNeeds;
442 Worklist.push_back(PrevMI);
443 }
444 }
445 }
446
447 // Propagate WQM flag to instruction inputs
Connor Abbottde068fe2017-08-04 18:36:50 +0000448 assert(!(II.Needs & StateExact));
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000449
Connor Abbottde068fe2017-08-04 18:36:50 +0000450 if (II.Needs != 0)
451 markInstructionUses(MI, II.Needs, Worklist);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000452}
453
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000454void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000455 std::vector<WorkItem>& Worklist) {
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000456 BlockInfo BI = Blocks[&MBB]; // Make a copy to prevent dangling references.
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000457
458 // Propagate through instructions
459 if (!MBB.empty()) {
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000460 MachineInstr *LastMI = &*MBB.rbegin();
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000461 InstrInfo &LastII = Instructions[LastMI];
462 if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) {
463 LastII.OutNeeds |= BI.OutNeeds;
464 Worklist.push_back(LastMI);
465 }
466 }
467
468 // Predecessor blocks must provide for our WQM/Exact needs.
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000469 for (MachineBasicBlock *Pred : MBB.predecessors()) {
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000470 BlockInfo &PredBI = Blocks[Pred];
471 if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds)
472 continue;
473
474 PredBI.OutNeeds |= BI.InNeeds;
475 PredBI.InNeeds |= BI.InNeeds;
476 Worklist.push_back(Pred);
477 }
478
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000479 // All successors must be prepared to accept the same set of WQM/Exact data.
480 for (MachineBasicBlock *Succ : MBB.successors()) {
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000481 BlockInfo &SuccBI = Blocks[Succ];
482 if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds)
483 continue;
484
485 SuccBI.InNeeds |= BI.OutNeeds;
486 Worklist.push_back(Succ);
487 }
488}
489
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000490char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000491 std::vector<WorkItem> Worklist;
492 char GlobalFlags = scanInstructions(MF, Worklist);
493
494 while (!Worklist.empty()) {
495 WorkItem WI = Worklist.back();
496 Worklist.pop_back();
497
498 if (WI.MI)
499 propagateInstruction(*WI.MI, Worklist);
500 else
501 propagateBlock(*WI.MBB, Worklist);
502 }
503
504 return GlobalFlags;
505}
506
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000507/// Whether \p MI really requires the exec state computed during analysis.
508///
509/// Scalar instructions must occasionally be marked WQM for correct propagation
510/// (e.g. thread masks leading up to branches), but when it comes to actual
511/// execution, they don't care about EXEC.
512bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const {
513 if (MI.isTerminator())
514 return true;
515
516 // Skip instructions that are not affected by EXEC
517 if (TII->isScalarUnit(MI))
518 return false;
519
520 // Generic instructions such as COPY will either disappear by register
521 // coalescing or be lowered to SALU or VALU instructions.
522 if (MI.isTransient()) {
523 if (MI.getNumExplicitOperands() >= 1) {
524 const MachineOperand &Op = MI.getOperand(0);
525 if (Op.isReg()) {
526 if (TRI->isSGPRReg(*MRI, Op.getReg())) {
527 // SGPR instructions are not affected by EXEC
528 return false;
529 }
530 }
531 }
532 }
533
534 return true;
535}
536
537MachineBasicBlock::iterator
538SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
539 MachineBasicBlock::iterator Before) {
Marek Olsak79c05872016-11-25 17:37:09 +0000540 unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000541
542 MachineInstr *Save =
543 BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg)
544 .addReg(AMDGPU::SCC);
545 MachineInstr *Restore =
546 BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::SCC)
547 .addReg(SaveReg);
548
549 LIS->InsertMachineInstrInMaps(*Save);
550 LIS->InsertMachineInstrInMaps(*Restore);
551 LIS->createAndComputeVirtRegInterval(SaveReg);
552
553 return Restore;
554}
555
556// Return an iterator in the (inclusive) range [First, Last] at which
557// instructions can be safely inserted, keeping in mind that some of the
558// instructions we want to add necessarily clobber SCC.
559MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
560 MachineBasicBlock &MBB, MachineBasicBlock::iterator First,
561 MachineBasicBlock::iterator Last, bool PreferLast, bool SaveSCC) {
562 if (!SaveSCC)
563 return PreferLast ? Last : First;
564
565 LiveRange &LR = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));
566 auto MBBE = MBB.end();
567 SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First)
568 : LIS->getMBBEndIdx(&MBB);
569 SlotIndex LastIdx =
570 Last != MBBE ? LIS->getInstructionIndex(*Last) : LIS->getMBBEndIdx(&MBB);
571 SlotIndex Idx = PreferLast ? LastIdx : FirstIdx;
572 const LiveRange::Segment *S;
573
574 for (;;) {
575 S = LR.getSegmentContaining(Idx);
576 if (!S)
577 break;
578
579 if (PreferLast) {
580 SlotIndex Next = S->start.getBaseIndex();
581 if (Next < FirstIdx)
582 break;
583 Idx = Next;
584 } else {
585 SlotIndex Next = S->end.getNextIndex().getBaseIndex();
586 if (Next > LastIdx)
587 break;
588 Idx = Next;
589 }
590 }
591
592 MachineBasicBlock::iterator MBBI;
593
594 if (MachineInstr *MI = LIS->getInstructionFromIndex(Idx))
595 MBBI = MI;
596 else {
597 assert(Idx == LIS->getMBBEndIdx(&MBB));
598 MBBI = MBB.end();
599 }
600
601 if (S)
602 MBBI = saveSCC(MBB, MBBI);
603
604 return MBBI;
605}
606
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000607void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
608 MachineBasicBlock::iterator Before,
Nicolai Haehnlea56e6b62016-03-21 20:39:24 +0000609 unsigned SaveWQM, unsigned LiveMaskReg) {
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000610 MachineInstr *MI;
611
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000612 if (SaveWQM) {
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000613 MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
614 SaveWQM)
615 .addReg(LiveMaskReg);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000616 } else {
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000617 MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
618 AMDGPU::EXEC)
619 .addReg(AMDGPU::EXEC)
620 .addReg(LiveMaskReg);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000621 }
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000622
623 LIS->InsertMachineInstrInMaps(*MI);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000624}
625
626void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
627 MachineBasicBlock::iterator Before,
Nicolai Haehnlea56e6b62016-03-21 20:39:24 +0000628 unsigned SavedWQM) {
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000629 MachineInstr *MI;
630
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000631 if (SavedWQM) {
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000632 MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
633 .addReg(SavedWQM);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000634 } else {
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000635 MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
636 AMDGPU::EXEC)
637 .addReg(AMDGPU::EXEC);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000638 }
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000639
640 LIS->InsertMachineInstrInMaps(*MI);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000641}
642
Connor Abbott92638ab2017-08-04 18:36:52 +0000643void SIWholeQuadMode::toWWM(MachineBasicBlock &MBB,
644 MachineBasicBlock::iterator Before,
645 unsigned SaveOrig) {
646 MachineInstr *MI;
647
648 assert(SaveOrig);
649 MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_OR_SAVEEXEC_B64),
650 SaveOrig)
651 .addImm(-1);
652 LIS->InsertMachineInstrInMaps(*MI);
653}
654
655void SIWholeQuadMode::fromWWM(MachineBasicBlock &MBB,
656 MachineBasicBlock::iterator Before,
657 unsigned SavedOrig) {
658 MachineInstr *MI;
659
660 assert(SavedOrig);
661 MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::EXIT_WWM), AMDGPU::EXEC)
662 .addReg(SavedOrig);
663 LIS->InsertMachineInstrInMaps(*MI);
664}
665
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000666void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
667 bool isEntry) {
668 auto BII = Blocks.find(&MBB);
669 if (BII == Blocks.end())
670 return;
671
672 const BlockInfo &BI = BII->second;
673
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000674 // This is a non-entry block that is WQM throughout, so no need to do
675 // anything.
Connor Abbott92638ab2017-08-04 18:36:52 +0000676 if (!isEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact)
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000677 return;
678
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000679 DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n");
680
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000681 unsigned SavedWQMReg = 0;
Connor Abbott92638ab2017-08-04 18:36:52 +0000682 unsigned SavedNonWWMReg = 0;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000683 bool WQMFromExec = isEntry;
Connor Abbott92638ab2017-08-04 18:36:52 +0000684 char State = (isEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM;
685 char NonWWMState = 0;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000686
687 auto II = MBB.getFirstNonPHI(), IE = MBB.end();
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000688 if (isEntry)
689 ++II; // Skip the instruction that saves LiveMask
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000690
Connor Abbott92638ab2017-08-04 18:36:52 +0000691 // This stores the first instruction where it's safe to switch from WQM to
692 // Exact or vice versa.
693 MachineBasicBlock::iterator FirstWQM = IE;
694
695 // This stores the first instruction where it's safe to switch from WWM to
696 // Exact/WQM or to switch to WWM. It must always be the same as, or after,
697 // FirstWQM since if it's safe to switch to/from WWM, it must be safe to
698 // switch to/from WQM as well.
699 MachineBasicBlock::iterator FirstWWM = IE;
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000700 for (;;) {
701 MachineBasicBlock::iterator Next = II;
Connor Abbott92638ab2017-08-04 18:36:52 +0000702 char Needs = StateExact | StateWQM; // WWM is disabled by default
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000703 char OutNeeds = 0;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000704
Connor Abbott92638ab2017-08-04 18:36:52 +0000705 if (FirstWQM == IE)
706 FirstWQM = II;
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000707
Connor Abbott92638ab2017-08-04 18:36:52 +0000708 if (FirstWWM == IE)
709 FirstWWM = II;
710
711 // First, figure out the allowed states (Needs) based on the propagated
712 // flags.
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000713 if (II != IE) {
714 MachineInstr &MI = *II;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000715
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000716 if (requiresCorrectState(MI)) {
717 auto III = Instructions.find(&MI);
718 if (III != Instructions.end()) {
Connor Abbott92638ab2017-08-04 18:36:52 +0000719 if (III->second.Needs & StateWWM)
720 Needs = StateWWM;
721 else if (III->second.Needs & StateWQM)
Connor Abbottde068fe2017-08-04 18:36:50 +0000722 Needs = StateWQM;
723 else
724 Needs &= ~III->second.Disabled;
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000725 OutNeeds = III->second.OutNeeds;
726 }
Connor Abbott92638ab2017-08-04 18:36:52 +0000727 } else {
728 // If the instruction doesn't actually need a correct EXEC, then we can
729 // safely leave WWM enabled.
730 Needs = StateExact | StateWQM | StateWWM;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000731 }
732
Connor Abbottde068fe2017-08-04 18:36:50 +0000733 if (MI.isTerminator() && OutNeeds == StateExact)
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000734 Needs = StateExact;
735
736 if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
737 MI.getOperand(3).setImm(1);
738
739 ++Next;
740 } else {
741 // End of basic block
742 if (BI.OutNeeds & StateWQM)
743 Needs = StateWQM;
744 else if (BI.OutNeeds == StateExact)
745 Needs = StateExact;
Connor Abbottde068fe2017-08-04 18:36:50 +0000746 else
747 Needs = StateWQM | StateExact;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000748 }
Nicolai Haehnle3b572002016-07-28 11:39:24 +0000749
Connor Abbott92638ab2017-08-04 18:36:52 +0000750 // Now, transition if necessary.
Connor Abbottde068fe2017-08-04 18:36:50 +0000751 if (!(Needs & State)) {
Connor Abbott92638ab2017-08-04 18:36:52 +0000752 MachineBasicBlock::iterator First;
753 if (State == StateWWM || Needs == StateWWM) {
754 // We must switch to or from WWM
755 First = FirstWWM;
756 } else {
757 // We only need to switch to/from WQM, so we can use FirstWQM
758 First = FirstWQM;
759 }
760
Connor Abbottde068fe2017-08-04 18:36:50 +0000761 MachineBasicBlock::iterator Before =
762 prepareInsertion(MBB, First, II, Needs == StateWQM,
763 Needs == StateExact || WQMFromExec);
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000764
Connor Abbott92638ab2017-08-04 18:36:52 +0000765 if (State == StateWWM) {
766 assert(SavedNonWWMReg);
767 fromWWM(MBB, Before, SavedNonWWMReg);
768 State = NonWWMState;
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000769 }
770
Connor Abbott92638ab2017-08-04 18:36:52 +0000771 if (Needs == StateWWM) {
772 NonWWMState = State;
773 SavedNonWWMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
774 toWWM(MBB, Before, SavedNonWWMReg);
775 State = StateWWM;
776 } else {
777 if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) {
778 if (!WQMFromExec && (OutNeeds & StateWQM))
779 SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
780
781 toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
782 State = StateExact;
783 } else if (State == StateExact && (Needs & StateWQM) &&
784 !(Needs & StateExact)) {
785 assert(WQMFromExec == (SavedWQMReg == 0));
786
787 toWQM(MBB, Before, SavedWQMReg);
788
789 if (SavedWQMReg) {
790 LIS->createAndComputeVirtRegInterval(SavedWQMReg);
791 SavedWQMReg = 0;
792 }
793 State = StateWQM;
794 } else {
795 // We can get here if we transitioned from WWM to a non-WWM state that
796 // already matches our needs, but we shouldn't need to do anything.
797 assert(Needs & State);
798 }
799 }
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000800 }
801
Connor Abbott92638ab2017-08-04 18:36:52 +0000802 if (Needs != (StateExact | StateWQM | StateWWM)) {
803 if (Needs != (StateExact | StateWQM))
804 FirstWQM = IE;
805 FirstWWM = IE;
806 }
Connor Abbottde068fe2017-08-04 18:36:50 +0000807
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000808 if (II == IE)
809 break;
810 II = Next;
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000811 }
812}
813
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000814void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
815 for (MachineInstr *MI : LiveMaskQueries) {
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000816 const DebugLoc &DL = MI->getDebugLoc();
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000817 unsigned Dest = MI->getOperand(0).getReg();
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000818 MachineInstr *Copy =
819 BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
820 .addReg(LiveMaskReg);
821
822 LIS->ReplaceMachineInstrInMaps(*MI, *Copy);
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000823 MI->eraseFromParent();
824 }
825}
826
Connor Abbott8c217d02017-08-04 18:36:49 +0000827void SIWholeQuadMode::lowerCopyInstrs() {
Connor Abbott66b9bd62017-08-04 18:36:54 +0000828 for (MachineInstr *MI : LowerToCopyInstrs) {
829 for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
830 MI->RemoveOperand(i);
Connor Abbott8c217d02017-08-04 18:36:49 +0000831 MI->setDesc(TII->get(AMDGPU::COPY));
Connor Abbott66b9bd62017-08-04 18:36:54 +0000832 }
Connor Abbott8c217d02017-08-04 18:36:49 +0000833}
834
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000835bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000836 Instructions.clear();
837 Blocks.clear();
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000838 LiveMaskQueries.clear();
Connor Abbott8c217d02017-08-04 18:36:49 +0000839 LowerToCopyInstrs.clear();
Connor Abbott92638ab2017-08-04 18:36:52 +0000840 CallingConv = MF.getFunction()->getCallingConv();
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000841
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000842 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
843
844 TII = ST.getInstrInfo();
845 TRI = &TII->getRegisterInfo();
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000846 MRI = &MF.getRegInfo();
Nicolai Haehnlebef0e902016-08-02 19:17:37 +0000847 LIS = &getAnalysis<LiveIntervals>();
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000848
849 char GlobalFlags = analyzeFunction(MF);
Connor Abbott92638ab2017-08-04 18:36:52 +0000850 unsigned LiveMaskReg = 0;
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000851 if (!(GlobalFlags & StateWQM)) {
852 lowerLiveMaskQueries(AMDGPU::EXEC);
Connor Abbott92638ab2017-08-04 18:36:52 +0000853 if (!(GlobalFlags & StateWWM))
854 return !LiveMaskQueries.empty();
855 } else {
856 // Store a copy of the original live mask when required
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000857 MachineBasicBlock &Entry = MF.front();
858 MachineBasicBlock::iterator EntryMI = Entry.getFirstNonPHI();
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000859
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000860 if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
861 LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000862 MachineInstr *MI = BuildMI(Entry, EntryMI, DebugLoc(),
863 TII->get(AMDGPU::COPY), LiveMaskReg)
864 .addReg(AMDGPU::EXEC);
865 LIS->InsertMachineInstrInMaps(*MI);
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000866 }
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000867
Connor Abbott92638ab2017-08-04 18:36:52 +0000868 lowerLiveMaskQueries(LiveMaskReg);
869
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000870 if (GlobalFlags == StateWQM) {
871 // For a shader that needs only WQM, we can just set it once.
872 BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
873 AMDGPU::EXEC)
874 .addReg(AMDGPU::EXEC);
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000875
Connor Abbott8c217d02017-08-04 18:36:49 +0000876 lowerCopyInstrs();
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000877 // EntryMI may become invalid here
878 return true;
879 }
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000880 }
881
Nicolai Haehnle3bba6a82016-09-03 12:26:38 +0000882 DEBUG(printInfo());
883
Connor Abbott8c217d02017-08-04 18:36:49 +0000884 lowerCopyInstrs();
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000885
Nicolai Haehnleb0c97482016-04-22 04:04:08 +0000886 // Handle the general case
Matt Arsenault8dff86d2016-07-13 05:55:15 +0000887 for (auto BII : Blocks)
888 processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin());
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000889
Nicolai Haehnlee58e0e32016-09-12 16:25:20 +0000890 // Physical registers like SCC aren't tracked by default anyway, so just
891 // removing the ranges we computed is the simplest option for maintaining
892 // the analysis results.
893 LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));
894
Nicolai Haehnle213e87f2016-03-21 20:28:33 +0000895 return true;
896}