R600: Make sure to schedule AR register uses and defs in the same clause
Reviewed-by: vljn at ovi.com
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183294 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 2a4a245..d915f40 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -816,7 +816,8 @@
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
AddrReg, ValueReg)
- .addReg(AMDGPU::AR_X, RegState::Implicit);
+ .addReg(AMDGPU::AR_X,
+ RegState::Implicit | RegState::Kill);
setImmOperand(Mov, R600Operands::DST_REL, 1);
return Mov;
}
@@ -833,7 +834,8 @@
MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
ValueReg,
AddrReg)
- .addReg(AMDGPU::AR_X, RegState::Implicit);
+ .addReg(AMDGPU::AR_X,
+ RegState::Implicit | RegState::Kill);
setImmOperand(Mov, R600Operands::SRC0_REL, 1);
return Mov;
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 8d61b8c..9469e0f 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -59,8 +59,16 @@
bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
(!Available[IDFetch].empty() || !Available[IDOther].empty());
- if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
- (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
+ // We want to scheduled AR defs as soon as possible to make sure they aren't
+ // put in a different ALU clause from their uses.
+ if (!SU && !UnscheduledARDefs.empty()) {
+ SU = UnscheduledARDefs[0];
+ UnscheduledARDefs.erase(UnscheduledARDefs.begin());
+ NextInstKind = IDAlu;
+ }
+
+ if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
+ (!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
// try to pick ALU
SU = pickAlu();
if (SU) {
@@ -84,6 +92,15 @@
NextInstKind = IDOther;
}
+ // We want to schedule the AR uses as late as possible to make sure that
+ // the AR defs have been released.
+ if (!SU && !UnscheduledARUses.empty()) {
+ SU = UnscheduledARUses[0];
+ UnscheduledARUses.erase(UnscheduledARUses.begin());
+ NextInstKind = IDAlu;
+ }
+
+
DEBUG(
if (SU) {
dbgs() << " ** Pick node **\n";
@@ -149,6 +166,21 @@
DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
int IK = getInstKind(SU);
+
+ // Check for AR register defines
+ for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
+ E = SU->getInstr()->operands_end();
+ I != E; ++I) {
+ if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
+ if (I->isDef()) {
+ UnscheduledARDefs.push_back(SU);
+ } else {
+ UnscheduledARUses.push_back(SU);
+ }
+ return;
+ }
+ }
+
// There is no export clause, we can schedule one as soon as its ready
if (IK == IDOther)
Available[IDOther].push_back(SU);
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index 814ae9e..4dedf70 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -52,6 +52,8 @@
std::vector<SUnit *> Available[IDLast], Pending[IDLast];
std::vector<SUnit *> AvailableAlus[AluLast];
+ std::vector<SUnit *> UnscheduledARDefs;
+ std::vector<SUnit *> UnscheduledARUses;
InstKind CurInstKind;
int CurEmitted;