AMDGPU: Add SIWholeQuadMode pass
Summary:
Whole quad mode is already enabled for pixel shaders that compute
derivatives, but it must be suspended for instructions that cause a
shader to have side effects (i.e. stores and atomics).
This pass addresses the issue by storing the real (initial) live mask
in a register, masking EXEC before instructions that require exact
execution and (re-)enabling WQM where required.
This pass is run before register coalescing so that we can use
machine SSA for analysis.
The changes in this patch expose a problem with the second machine
scheduling pass: target independent instructions like COPY implicitly
use EXEC when they operate on VGPRs, but this fact is not encoded in
the MIR. This can lead to miscompilation because instructions are
moved past changes to EXEC.
This patch fixes the problem by adding use-implicit operands to
target independent instructions. Some general codegen passes are
relaxed to work with such implicit use operands.
Reviewers: arsenm, tstellarAMD, mareko
Subscribers: MatzeB, arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D18162
llvm-svn: 263982
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index a804a5e..c4a06ef 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -78,7 +78,7 @@
void SkipIfDead(MachineInstr &MI);
void If(MachineInstr &MI);
- void Else(MachineInstr &MI);
+ void Else(MachineInstr &MI, bool ExecModified);
void Break(MachineInstr &MI);
void IfBreak(MachineInstr &MI);
void ElseBreak(MachineInstr &MI);
@@ -215,7 +215,7 @@
MI.eraseFromParent();
}
-void SILowerControlFlow::Else(MachineInstr &MI) {
+void SILowerControlFlow::Else(MachineInstr &MI, bool ExecModified) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
@@ -225,6 +225,15 @@
TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
.addReg(Src); // Saved EXEC
+ if (ExecModified) {
+ // Adjust the saved exec to account for the modifications during the flow
+ // block that contains the ELSE. This can happen when WQM mode is switched
+ // off.
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64), Dst)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Dst);
+ }
+
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
.addReg(AMDGPU::EXEC)
.addReg(Dst);
@@ -488,7 +497,6 @@
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
- bool NeedWQM = false;
bool NeedFlat = false;
unsigned Depth = 0;
@@ -498,17 +506,24 @@
MachineBasicBlock *EmptyMBBAtEnd = NULL;
MachineBasicBlock &MBB = *BI;
MachineBasicBlock::iterator I, Next;
+ bool ExecModified = false;
+
for (I = MBB.begin(); I != MBB.end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
- if (TII->isWQM(MI) || TII->isDS(MI))
- NeedWQM = true;
// Flat uses m0 in case it needs to access LDS.
if (TII->isFLAT(MI))
NeedFlat = true;
+ for (const auto &Def : I->defs()) {
+ if (Def.isReg() && Def.isDef() && Def.getReg() == AMDGPU::EXEC) {
+ ExecModified = true;
+ break;
+ }
+ }
+
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF:
@@ -517,7 +532,7 @@
break;
case AMDGPU::SI_ELSE:
- Else(MI);
+ Else(MI, ExecModified);
break;
case AMDGPU::SI_BREAK:
@@ -599,12 +614,6 @@
}
}
- if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
- MachineBasicBlock &MBB = MF.front();
- BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
- AMDGPU::EXEC).addReg(AMDGPU::EXEC);
- }
-
if (NeedFlat && MFI->IsKernel) {
// TODO: What to use with function calls?
// We will need to Initialize the flat scratch register pair.