R600: Don't emit empty then clause and use alu_pop_after
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186725 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index 85ac725..fac56f0 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -1039,8 +1039,11 @@
} else if (FalseMBB->succ_size() == 1
&& *FalseMBB->succ_begin() == TrueMBB) {
// Triangle pattern, true is empty
- LandBlk = TrueMBB;
- TrueMBB = NULL;
+ // We reverse the predicate to make a triangle, empty false pattern;
+ std::swap(TrueMBB, FalseMBB);
+ reversePredicateSetter(MBB->end());
+ LandBlk = FalseMBB;
+ FalseMBB = NULL;
} else if (FalseMBB->succ_size() == 1
&& isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
LandBlk = *FalseMBB->succ_begin();
@@ -1456,6 +1459,7 @@
void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
+ assert (TrueMBB);
DEBUG(
dbgs() << "ifPattern BB" << MBB->getNumber();
dbgs() << "{ ";
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 1cd0ac3..b69d38b 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -347,6 +347,9 @@
MaxStack = 1;
}
std::vector<ClauseFile> FetchClauses, AluClauses;
+ std::vector<MachineInstr *> LastAlu(1);
+ std::vector<MachineInstr *> ToPopAfter;
+
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E;) {
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
@@ -357,6 +360,10 @@
}
MachineBasicBlock::iterator MI = I;
+ if (MI->getOpcode() != AMDGPU::ENDIF)
+ LastAlu.back() = 0;
+ if (MI->getOpcode() == AMDGPU::CF_ALU)
+ LastAlu.back() = MI;
I++;
switch (MI->getOpcode()) {
case AMDGPU::CF_ALU_PUSH_BEFORE:
@@ -403,6 +410,7 @@
break;
}
case AMDGPU::IF_PREDICATE_SET: {
+ LastAlu.push_back(0);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_JUMP))
.addImm(0)
@@ -420,7 +428,7 @@
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_ELSE))
.addImm(0)
- .addImm(1);
+ .addImm(0);
DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
IfThenElseStack.push_back(MIb);
MI->eraseFromParent();
@@ -429,17 +437,24 @@
}
case AMDGPU::ENDIF: {
CurrentStack--;
+ if (LastAlu.back()) {
+ ToPopAfter.push_back(LastAlu.back());
+ } else {
+ MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
+ getHWInstrDesc(CF_POP))
+ .addImm(CfCount + 1)
+ .addImm(1);
+ (void)MIb;
+ DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ CfCount++;
+ }
+
MachineInstr *IfOrElseInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
- CounterPropagateAddr(IfOrElseInst, CfCount + 1);
- MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
- getHWInstrDesc(CF_POP))
- .addImm(CfCount + 1)
- .addImm(1);
- (void)MIb;
- DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
+ CounterPropagateAddr(IfOrElseInst, CfCount);
+ IfOrElseInst->getOperand(1).setImm(1);
+ LastAlu.pop_back();
MI->eraseFromParent();
- CfCount++;
break;
}
case AMDGPU::PREDICATED_BREAK: {
@@ -484,6 +499,21 @@
break;
}
}
+ for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
+ MachineInstr *Alu = ToPopAfter[i];
+ BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
+ TII->get(AMDGPU::CF_ALU_POP_AFTER))
+ .addImm(Alu->getOperand(0).getImm())
+ .addImm(Alu->getOperand(1).getImm())
+ .addImm(Alu->getOperand(2).getImm())
+ .addImm(Alu->getOperand(3).getImm())
+ .addImm(Alu->getOperand(4).getImm())
+ .addImm(Alu->getOperand(5).getImm())
+ .addImm(Alu->getOperand(6).getImm())
+ .addImm(Alu->getOperand(7).getImm())
+ .addImm(Alu->getOperand(8).getImm());
+ Alu->eraseFromParent();
+ }
MFI->StackSize = getHWStackSize(MaxStack, HasPush);
}
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index df5c438..3652c89 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -624,6 +624,7 @@
def CF_ALU : ALU_CLAUSE<8, "ALU">;
def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
+def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
def FETCH_CLAUSE : AMDGPUInst <(outs),
(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {