Revert "R600: Non vector only instruction can be scheduled on trans unit"
This reverts commit 98ce62780ea7185ba710868bf83c8077e8d7f6d6.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187526 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 2fd9300..2e9b732 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -459,9 +459,6 @@
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
const std::vector<std::pair<int, unsigned> > &TransOps,
unsigned ConstCount) {
- // TransALU can't read 3 constants
- if (ConstCount > 2)
- return false;
for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
const std::pair<int, unsigned> &Src = TransOps[i];
unsigned Cycle = getTransSwizzle(TransSwz, i);
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 0499dd5..0dc0365 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -9,6 +9,7 @@
//
/// \file
/// \brief R600 Machine Scheduler interface
+// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
//
//===----------------------------------------------------------------------===//
@@ -28,7 +29,6 @@
DAG = dag;
TII = static_cast<const R600InstrInfo*>(DAG->TII);
TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
- VLIW5 = !DAG->MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
MRI = &DAG->MRI;
CurInstKind = IDOther;
CurEmitted = 0;
@@ -342,16 +342,14 @@
}
}
-SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
+SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
if (Q.empty())
return NULL;
for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
It != E; ++It) {
SUnit *SU = *It;
InstructionsGroupCandidate.push_back(SU->getInstr());
- if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)
- && (!AnyALU || !TII->isVectorOnly(SU->getInstr()))
- ) {
+ if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
InstructionsGroupCandidate.pop_back();
Q.erase((It + 1).base());
return SU;
@@ -375,8 +373,6 @@
DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0;
-// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
-// OccupedSlotsMask |= 16;
InstructionsGroupCandidate.clear();
LoadAlu();
}
@@ -413,12 +409,12 @@
}
}
-SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
- SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
+ SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
if (SlotedSU)
return SlotedSU;
- SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
+ SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
if (UnslotedSU)
AssignSlot(UnslotedSU->getInstr(), Slot);
return UnslotedSU;
@@ -438,35 +434,30 @@
// Bottom up scheduling : predX must comes first
if (!AvailableAlus[AluPredX].empty()) {
OccupedSlotsMask |= 31;
- return PopInst(AvailableAlus[AluPredX], false);
+ return PopInst(AvailableAlus[AluPredX]);
}
// Flush physical reg copies (RA will discard them)
if (!AvailableAlus[AluDiscarded].empty()) {
OccupedSlotsMask |= 31;
- return PopInst(AvailableAlus[AluDiscarded], false);
+ return PopInst(AvailableAlus[AluDiscarded]);
}
// If there is a T_XYZW alu available, use it
if (!AvailableAlus[AluT_XYZW].empty()) {
OccupedSlotsMask |= 15;
- return PopInst(AvailableAlus[AluT_XYZW], false);
+ return PopInst(AvailableAlus[AluT_XYZW]);
}
}
bool TransSlotOccuped = OccupedSlotsMask & 16;
- if (!TransSlotOccuped && VLIW5) {
+ if (!TransSlotOccuped) {
if (!AvailableAlus[AluTrans].empty()) {
OccupedSlotsMask |= 16;
- return PopInst(AvailableAlus[AluTrans], false);
- }
- SUnit *SU = AttemptFillSlot(3, true);
- if (SU) {
- OccupedSlotsMask |= 16;
- return SU;
+ return PopInst(AvailableAlus[AluTrans]);
}
}
for (int Chan = 3; Chan > -1; --Chan) {
bool isOccupied = OccupedSlotsMask & (1 << Chan);
if (!isOccupied) {
- SUnit *SU = AttemptFillSlot(Chan, false);
+ SUnit *SU = AttemptFillSlot(Chan);
if (SU) {
OccupedSlotsMask |= (1 << Chan);
InstructionsGroupCandidate.push_back(SU->getInstr());
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index 0a6f120..f8965d8 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -84,16 +84,15 @@
private:
std::vector<MachineInstr *> InstructionsGroupCandidate;
- bool VLIW5;
int getInstKind(SUnit *SU);
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
AluKind getAluKind(SUnit *SU) const;
void LoadAlu();
unsigned AvailablesAluCount() const;
- SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu);
+ SUnit *AttemptFillSlot (unsigned Slot);
void PrepareNextSlot();
- SUnit *PopInst(std::vector<SUnit*> &Q, bool AnyALU);
+ SUnit *PopInst(std::vector<SUnit*> &Q);
void AssignSlot(MachineInstr *MI, unsigned Slot);
SUnit* pickAlu();
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index 6c70052..5cf1fd3 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -58,8 +58,6 @@
private:
const R600InstrInfo *TII;
const R600RegisterInfo &TRI;
- bool VLIW5;
- bool ConsideredInstUsesAlreadyWrittenVectorElement;
unsigned getSlot(const MachineInstr *MI) const {
return TRI.getHWRegChan(MI->getOperand(0).getReg());
@@ -76,13 +74,7 @@
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
if (I->isBundle())
BI++;
- int LastDstChan = -1;
do {
- bool isTrans = false;
- int BISlot = getSlot(BI);
- if (LastDstChan >= BISlot)
- isTrans = true;
- LastDstChan = BISlot;
if (TII->isPredicated(BI))
continue;
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
@@ -93,7 +85,7 @@
continue;
}
unsigned Dst = BI->getOperand(DstIdx).getReg();
- if (isTrans || TII->isTransOnly(BI)) {
+ if (TII->isTransOnly(BI)) {
Result[Dst] = AMDGPU::PS;
continue;
}
@@ -150,14 +142,10 @@
MachineDominatorTree &MDT)
: VLIWPacketizerList(MF, MLI, MDT, true),
TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
- TRI(TII->getRegisterInfo()) {
- VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
- }
+ TRI(TII->getRegisterInfo()) { }
// initPacketizerState - initialize some internal flags.
- void initPacketizerState() {
- ConsideredInstUsesAlreadyWrittenVectorElement = false;
- }
+ void initPacketizerState() { }
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
@@ -184,8 +172,8 @@
// together.
bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
- if (getSlot(MII) == getSlot(MIJ))
- ConsideredInstUsesAlreadyWrittenVectorElement = true;
+ if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
+ return false;
// Does MII and MIJ share the same pred_sel ?
int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
@@ -223,20 +211,6 @@
std::vector<R600InstrInfo::BankSwizzle> &BS,
bool &isTransSlot) {
isTransSlot = TII->isTransOnly(MI);
- assert (!isTransSlot || VLIW5);
-
- // Is the dst reg sequence legal ?
- if (!isTransSlot && !CurrentPacketMIs.empty()) {
- if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
- if (ConsideredInstUsesAlreadyWrittenVectorElement &&
- !TII->isVectorOnly(MI) && VLIW5) {
- isTransSlot = true;
- DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
- }
- else
- return false;
- }
- }
// Are the Constants limitations met ?
CurrentPacketMIs.push_back(MI);
@@ -304,8 +278,6 @@
return It;
}
endPacket(MI->getParent(), MI);
- if (TII->isTransOnly(MI))
- return MI;
return VLIWPacketizerList::addToPacket(MI);
}
};