Blame - llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp - toolchain/llvm-project

blob: dd133d37eb7f166c9347e756cf110378a540d17e [file] [log] [blame]

Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	1	//===-- SIWholeQuadMode.cpp - enter and suspend whole quad mode -----------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
				11	/// \brief This pass adds instructions to enable whole quad mode for pixel
				12	/// shaders.
				13	///
				14	/// Whole quad mode is required for derivative computations, but it interferes
				15	/// with shader side effects (stores and atomics). This pass is run on the
				16	/// scheduled machine IR but before register coalescing, so that machine SSA is
				17	/// available for analysis. It ensures that WQM is enabled when necessary, but
				18	/// disabled around stores and atomics.
				19	///
				20	/// When necessary, this pass creates a function prolog
				21	///
				22	/// S_MOV_B64 LiveMask, EXEC
				23	/// S_WQM_B64 EXEC, EXEC
				24	///
				25	/// to enter WQM at the top of the function and surrounds blocks of Exact
				26	/// instructions by
				27	///
				28	/// S_AND_SAVEEXEC_B64 Tmp, LiveMask
				29	/// ...
				30	/// S_MOV_B64 EXEC, Tmp
				31	///
				32	/// In order to avoid excessive switching during sequences of Exact
				33	/// instructions, the pass first analyzes which instructions must be run in WQM
				34	/// (aka which instructions produce values that lead to derivative
				35	/// computations).
				36	///
				37	/// Basic blocks are always exited in WQM as long as some successor needs WQM.
				38	///
				39	/// There is room for improvement given better control flow analysis:
				40	///
				41	/// (1) at the top level (outside of control flow statements, and as long as
				42	/// kill hasn't been used), one SGPR can be saved by recovering WQM from
				43	/// the LiveMask (this is implemented for the entry block).
				44	///
				45	/// (2) when entire regions (e.g. if-else blocks or entire loops) only
				46	/// consist of exact and don't-care instructions, the switch only has to
				47	/// be done at the entry and exit points rather than potentially in each
				48	/// block of the region.
				49	///
				50	//===----------------------------------------------------------------------===//
				51
				52	#include "AMDGPU.h"
				53	#include "AMDGPUSubtarget.h"
				54	#include "SIInstrInfo.h"
				55	#include "SIMachineFunctionInfo.h"
				56	#include "llvm/CodeGen/MachineDominanceFrontier.h"
				57	#include "llvm/CodeGen/MachineDominators.h"
				58	#include "llvm/CodeGen/MachineFunction.h"
				59	#include "llvm/CodeGen/MachineFunctionPass.h"
				60	#include "llvm/CodeGen/MachineInstrBuilder.h"
				61	#include "llvm/CodeGen/MachineRegisterInfo.h"
				62	#include "llvm/IR/Constants.h"
				63
				64	using namespace llvm;
				65
				66	#define DEBUG_TYPE "si-wqm"
				67
				68	namespace {
				69
				70	enum {
				71	StateWQM = 0x1,
				72	StateExact = 0x2,
				73	};
				74
				75	struct InstrInfo {
				76	char Needs = 0;
				77	char OutNeeds = 0;
				78	};
				79
				80	struct BlockInfo {
				81	char Needs = 0;
				82	char InNeeds = 0;
				83	char OutNeeds = 0;
				84	};
				85
				86	struct WorkItem {
				87	const MachineBasicBlock *MBB = nullptr;
				88	const MachineInstr *MI = nullptr;
				89
				90	WorkItem() {}
				91	WorkItem(const MachineBasicBlock *MBB) : MBB(MBB) {}
				92	WorkItem(const MachineInstr *MI) : MI(MI) {}
				93	};
				94
				95	class SIWholeQuadMode : public MachineFunctionPass {
				96	private:
				97	const SIInstrInfo *TII;
				98	const SIRegisterInfo *TRI;
				99	MachineRegisterInfo *MRI;
				100
				101	DenseMap<const MachineInstr *, InstrInfo> Instructions;
				102	DenseMap<const MachineBasicBlock *, BlockInfo> Blocks;
				103	SmallVector<const MachineInstr *, 2> ExecExports;
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	104	SmallVector<MachineInstr *, 1> LiveMaskQueries;
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	105
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	106	char scanInstructions(MachineFunction &MF, std::vector<WorkItem>& Worklist);
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	107	void propagateInstruction(const MachineInstr &MI, std::vector<WorkItem>& Worklist);
				108	void propagateBlock(const MachineBasicBlock &MBB, std::vector<WorkItem>& Worklist);
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	109	char analyzeFunction(MachineFunction &MF);
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	110
				111	void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
				112	unsigned SaveWQM, unsigned LiveMaskReg);
				113	void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
				114	unsigned SavedWQM);
				115	void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);
				116
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	117	void lowerLiveMaskQueries(unsigned LiveMaskReg);
				118
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	119	public:
				120	static char ID;
				121
				122	SIWholeQuadMode() :
				123	MachineFunctionPass(ID) { }
				124
				125	bool runOnMachineFunction(MachineFunction &MF) override;
				126
				127	const char *getPassName() const override {
				128	return "SI Whole Quad Mode";
				129	}
				130
				131	void getAnalysisUsage(AnalysisUsage &AU) const override {
				132	AU.setPreservesCFG();
				133	MachineFunctionPass::getAnalysisUsage(AU);
				134	}
				135	};
				136
				137	} // End anonymous namespace
				138
				139	char SIWholeQuadMode::ID = 0;
				140
				141	INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE,
				142	"SI Whole Quad Mode", false, false)
				143	INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE,
				144	"SI Whole Quad Mode", false, false)
				145
				146	char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
				147
				148	FunctionPass *llvm::createSIWholeQuadModePass() {
				149	return new SIWholeQuadMode;
				150	}
				151
				152	// Scan instructions to determine which ones require an Exact execmask and
				153	// which ones seed WQM requirements.
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	154	char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	155	std::vector<WorkItem> &Worklist) {
				156	char GlobalFlags = 0;
				157
				158	for (auto BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) {
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	159	MachineBasicBlock &MBB = *BI;
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	160
				161	for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	162	MachineInstr &MI = *II;
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	163	unsigned Opcode = MI.getOpcode();
				164	char Flags;
				165
				166	if (TII->isWQM(Opcode) \|\| TII->isDS(Opcode)) {
				167	Flags = StateWQM;
				168	} else if (TII->get(Opcode).mayStore() &&
				169	(MI.getDesc().TSFlags & SIInstrFlags::VM_CNT)) {
				170	Flags = StateExact;
				171	} else {
				172	// Handle export instructions with the exec mask valid flag set
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	173	if (Opcode == AMDGPU::EXP) {
				174	if (MI.getOperand(4).getImm() != 0)
				175	ExecExports.push_back(&MI);
				176	} else if (Opcode == AMDGPU::SI_PS_LIVE) {
				177	LiveMaskQueries.push_back(&MI);
				178	}
				179
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	180	continue;
				181	}
				182
				183	Instructions[&MI].Needs = Flags;
				184	Worklist.push_back(&MI);
				185	GlobalFlags \|= Flags;
				186	}
				187	}
				188
				189	return GlobalFlags;
				190	}
				191
				192	void SIWholeQuadMode::propagateInstruction(const MachineInstr &MI,
				193	std::vector<WorkItem>& Worklist) {
				194	const MachineBasicBlock &MBB = *MI.getParent();
Nicolai Haehnle	0a33abd	2016-03-21 22:54:02 +0000	[diff] [blame]	195	InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	196	BlockInfo &BI = Blocks[&MBB];
				197
				198	// Control flow-type instructions that are followed by WQM computations
				199	// must themselves be in WQM.
				200	if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) &&
Nicolai Haehnle	0a33abd	2016-03-21 22:54:02 +0000	[diff] [blame]	201	(MI.isBranch() \|\| MI.isTerminator() \|\| MI.getOpcode() == AMDGPU::SI_KILL)) {
				202	Instructions[&MI].Needs = StateWQM;
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	203	II.Needs = StateWQM;
Nicolai Haehnle	0a33abd	2016-03-21 22:54:02 +0000	[diff] [blame]	204	}
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	205
				206	// Propagate to block level
				207	BI.Needs \|= II.Needs;
				208	if ((BI.InNeeds \| II.Needs) != BI.InNeeds) {
				209	BI.InNeeds \|= II.Needs;
				210	Worklist.push_back(&MBB);
				211	}
				212
				213	// Propagate backwards within block
				214	if (const MachineInstr *PrevMI = MI.getPrevNode()) {
				215	char InNeeds = II.Needs \| II.OutNeeds;
				216	if (!PrevMI->isPHI()) {
				217	InstrInfo &PrevII = Instructions[PrevMI];
				218	if ((PrevII.OutNeeds \| InNeeds) != PrevII.OutNeeds) {
				219	PrevII.OutNeeds \|= InNeeds;
				220	Worklist.push_back(PrevMI);
				221	}
				222	}
				223	}
				224
				225	// Propagate WQM flag to instruction inputs
				226	assert(II.Needs != (StateWQM \| StateExact));
				227	if (II.Needs != StateWQM)
				228	return;
				229
				230	for (const MachineOperand &Use : MI.uses()) {
				231	if (!Use.isReg() \|\| !Use.isUse())
				232	continue;
				233
				234	// At this point, physical registers appear as inputs or outputs
				235	// and following them makes no sense (and would in fact be incorrect
				236	// when the same VGPR is used as both an output and an input that leads
				237	// to a NeedsWQM instruction).
				238	//
				239	// Note: VCC appears e.g. in 64-bit addition with carry - theoretically we
				240	// have to trace this, in practice it happens for 64-bit computations like
				241	// pointers where both dwords are followed already anyway.
				242	if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
				243	continue;
				244
				245	for (const MachineOperand &Def : MRI->def_operands(Use.getReg())) {
				246	const MachineInstr *DefMI = Def.getParent();
				247	InstrInfo &DefII = Instructions[DefMI];
				248
				249	// Obviously skip if DefMI is already flagged as NeedWQM.
				250	//
				251	// The instruction might also be flagged as NeedExact. This happens when
				252	// the result of an atomic is used in a WQM computation. In this case,
				253	// the atomic must not run for helper pixels and the WQM result is
				254	// undefined.
				255	if (DefII.Needs != 0)
				256	continue;
				257
				258	DefII.Needs = StateWQM;
				259	Worklist.push_back(DefMI);
				260	}
				261	}
				262	}
				263
				264	void SIWholeQuadMode::propagateBlock(const MachineBasicBlock &MBB,
				265	std::vector<WorkItem>& Worklist) {
Nicolai Haehnle	0a33abd	2016-03-21 22:54:02 +0000	[diff] [blame]	266	BlockInfo BI = Blocks[&MBB]; // take a copy to prevent dangling references
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	267
				268	// Propagate through instructions
				269	if (!MBB.empty()) {
				270	const MachineInstr LastMI = &MBB.rbegin();
				271	InstrInfo &LastII = Instructions[LastMI];
				272	if ((LastII.OutNeeds \| BI.OutNeeds) != LastII.OutNeeds) {
				273	LastII.OutNeeds \|= BI.OutNeeds;
				274	Worklist.push_back(LastMI);
				275	}
				276	}
				277
				278	// Predecessor blocks must provide for our WQM/Exact needs.
				279	for (const MachineBasicBlock *Pred : MBB.predecessors()) {
				280	BlockInfo &PredBI = Blocks[Pred];
				281	if ((PredBI.OutNeeds \| BI.InNeeds) == PredBI.OutNeeds)
				282	continue;
				283
				284	PredBI.OutNeeds \|= BI.InNeeds;
				285	PredBI.InNeeds \|= BI.InNeeds;
				286	Worklist.push_back(Pred);
				287	}
				288
				289	// All successors must be prepared to accept the same set of WQM/Exact
				290	// data.
				291	for (const MachineBasicBlock *Succ : MBB.successors()) {
				292	BlockInfo &SuccBI = Blocks[Succ];
				293	if ((SuccBI.InNeeds \| BI.OutNeeds) == SuccBI.InNeeds)
				294	continue;
				295
				296	SuccBI.InNeeds \|= BI.OutNeeds;
				297	Worklist.push_back(Succ);
				298	}
				299	}
				300
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	301	char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	302	std::vector<WorkItem> Worklist;
				303	char GlobalFlags = scanInstructions(MF, Worklist);
				304
				305	while (!Worklist.empty()) {
				306	WorkItem WI = Worklist.back();
				307	Worklist.pop_back();
				308
				309	if (WI.MI)
				310	propagateInstruction(*WI.MI, Worklist);
				311	else
				312	propagateBlock(*WI.MBB, Worklist);
				313	}
				314
				315	return GlobalFlags;
				316	}
				317
				318	void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
				319	MachineBasicBlock::iterator Before,
Nicolai Haehnle	a56e6b6	2016-03-21 20:39:24 +0000	[diff] [blame]	320	unsigned SaveWQM, unsigned LiveMaskReg) {
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	321	if (SaveWQM) {
				322	BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
				323	SaveWQM)
				324	.addReg(LiveMaskReg);
				325	} else {
				326	BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
				327	AMDGPU::EXEC)
				328	.addReg(AMDGPU::EXEC)
				329	.addReg(LiveMaskReg);
				330	}
				331	}
				332
				333	void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
				334	MachineBasicBlock::iterator Before,
Nicolai Haehnle	a56e6b6	2016-03-21 20:39:24 +0000	[diff] [blame]	335	unsigned SavedWQM) {
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	336	if (SavedWQM) {
				337	BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
				338	.addReg(SavedWQM);
				339	} else {
				340	BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
				341	AMDGPU::EXEC)
				342	.addReg(AMDGPU::EXEC);
				343	}
				344	}
				345
				346	void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
				347	bool isEntry) {
				348	auto BII = Blocks.find(&MBB);
				349	if (BII == Blocks.end())
				350	return;
				351
				352	const BlockInfo &BI = BII->second;
				353
				354	if (!(BI.InNeeds & StateWQM))
				355	return;
				356
				357	// This is a non-entry block that is WQM throughout, so no need to do
				358	// anything.
				359	if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact)
				360	return;
				361
				362	unsigned SavedWQMReg = 0;
				363	bool WQMFromExec = isEntry;
				364	char State = isEntry ? StateExact : StateWQM;
				365
				366	auto II = MBB.getFirstNonPHI(), IE = MBB.end();
				367	while (II != IE) {
				368	MachineInstr &MI = *II;
				369	++II;
				370
				371	// Skip instructions that are not affected by EXEC
				372	if (MI.getDesc().TSFlags & (SIInstrFlags::SALU \| SIInstrFlags::SMRD) &&
				373	!MI.isBranch() && !MI.isTerminator())
				374	continue;
				375
				376	// Generic instructions such as COPY will either disappear by register
				377	// coalescing or be lowered to SALU or VALU instructions.
				378	if (TargetInstrInfo::isGenericOpcode(MI.getOpcode())) {
				379	if (MI.getNumExplicitOperands() >= 1) {
				380	const MachineOperand &Op = MI.getOperand(0);
				381	if (Op.isReg()) {
				382	if (TRI->isSGPRReg(*MRI, Op.getReg())) {
				383	// SGPR instructions are not affected by EXEC
				384	continue;
				385	}
				386	}
				387	}
				388	}
				389
				390	char Needs = 0;
				391	char OutNeeds = 0;
				392	auto InstrInfoIt = Instructions.find(&MI);
				393	if (InstrInfoIt != Instructions.end()) {
				394	Needs = InstrInfoIt->second.Needs;
				395	OutNeeds = InstrInfoIt->second.OutNeeds;
				396
				397	// Make sure to switch to Exact mode before the end of the block when
				398	// Exact and only Exact is needed further downstream.
				399	if (OutNeeds == StateExact && (MI.isBranch() \|\| MI.isTerminator())) {
				400	assert(Needs == 0);
				401	Needs = StateExact;
				402	}
				403	}
				404
				405	// State switching
				406	if (Needs && State != Needs) {
				407	if (Needs == StateExact) {
				408	assert(!SavedWQMReg);
				409
				410	if (!WQMFromExec && (OutNeeds & StateWQM))
				411	SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
				412
				413	toExact(MBB, &MI, SavedWQMReg, LiveMaskReg);
				414	} else {
				415	assert(WQMFromExec == (SavedWQMReg == 0));
				416	toWQM(MBB, &MI, SavedWQMReg);
				417	SavedWQMReg = 0;
				418	}
				419
				420	State = Needs;
				421	}
				422
				423	if (MI.getOpcode() == AMDGPU::SI_KILL)
				424	WQMFromExec = false;
				425	}
				426
				427	if ((BI.OutNeeds & StateWQM) && State != StateWQM) {
				428	assert(WQMFromExec == (SavedWQMReg == 0));
				429	toWQM(MBB, MBB.end(), SavedWQMReg);
				430	} else if (BI.OutNeeds == StateExact && State != StateExact) {
				431	toExact(MBB, MBB.end(), 0, LiveMaskReg);
				432	}
				433	}
				434
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	435	void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
				436	for (MachineInstr *MI : LiveMaskQueries) {
				437	DebugLoc DL = MI->getDebugLoc();
				438	unsigned Dest = MI->getOperand(0).getReg();
				439	BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
				440	.addReg(LiveMaskReg);
				441	MI->eraseFromParent();
				442	}
				443	}
				444
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	445	bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
Nicolai Haehnle	df3a20c	2016-04-06 19:40:20 +0000	[diff] [blame]	446	if (MF.getFunction()->getCallingConv() != CallingConv::AMDGPU_PS)
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	447	return false;
				448
				449	Instructions.clear();
				450	Blocks.clear();
				451	ExecExports.clear();
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	452	LiveMaskQueries.clear();
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	453
				454	TII = static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
				455	TRI = static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
				456	MRI = &MF.getRegInfo();
				457
				458	char GlobalFlags = analyzeFunction(MF);
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	459	if (!(GlobalFlags & StateWQM)) {
				460	lowerLiveMaskQueries(AMDGPU::EXEC);
				461	return !LiveMaskQueries.empty();
				462	}
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	463
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	464	// Store a copy of the original live mask when required
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	465	MachineBasicBlock &Entry = MF.front();
				466	MachineInstr *EntryMI = Entry.getFirstNonPHI();
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	467	unsigned LiveMaskReg = 0;
				468
				469	if (GlobalFlags & StateExact \|\| !LiveMaskQueries.empty()) {
				470	LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
				471	BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg)
				472	.addReg(AMDGPU::EXEC);
				473	}
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	474
				475	if (GlobalFlags == StateWQM) {
				476	// For a shader that needs only WQM, we can just set it once.
				477	BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
				478	AMDGPU::EXEC).addReg(AMDGPU::EXEC);
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	479
				480	lowerLiveMaskQueries(LiveMaskReg);
				481	// EntryMI may become invalid here
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	482	return true;
				483	}
				484
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	485	lowerLiveMaskQueries(LiveMaskReg);
				486	EntryMI = nullptr;
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	487
Nicolai Haehnle	b0c9748	2016-04-22 04:04:08 +0000	[diff] [blame]	488	// Handle the general case
Nicolai Haehnle	213e87f	2016-03-21 20:28:33 +0000	[diff] [blame]	489	for (const auto &BII : Blocks)
				490	processBlock(const_cast<MachineBasicBlock &>(*BII.first), LiveMaskReg,
				491	BII.first == &*MF.begin());
				492
				493	return true;
				494	}