AMDGPU: Propagate undef flag during pre-RA exec mask optimizations
Summary: Issue: https://github.com/GPUOpen-Drivers/llpc/issues/204
Reviewers: arsenm, rampitec
Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D68184
llvm-svn: 374041
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 681c3b3..fdd30db 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -250,15 +250,16 @@
Op1->getImm() != 0 || Op2->getImm() != 1)
return AMDGPU::NoRegister;
- LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t'
- << *Cmp << '\t' << *And);
+ LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
+ << *And);
Register CCReg = CC->getReg();
LIS->RemoveMachineInstrFromMaps(*And);
- MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
- TII->get(Andn2Opc), And->getOperand(0).getReg())
- .addReg(ExecReg)
- .addReg(CCReg, 0, CC->getSubReg());
+ MachineInstr *Andn2 =
+ BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc),
+ And->getOperand(0).getReg())
+ .addReg(ExecReg)
+ .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg());
And->eraseFromParent();
LIS->InsertMachineInstrInMaps(*Andn2);