Revert "R600: Non vector only instruction can be scheduled on trans unit" This reverts commit 98ce62780ea7185ba710868bf83c8077e8d7f6d6. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187526 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 6b3f6a744a6d16c5d62dc3477186035e8a74a8e9 [log] [tgz]
author: Tom Stellard <thomas.stellard@amd.com> Wed Jul 31 20:43:27 2013 +0000
committer: Tom Stellard <thomas.stellard@amd.com> Wed Jul 31 20:43:27 2013 +0000
tree: 1ace9471553c04326bc676bbc6b2cea18352a250
parent: 5519dc9de88b36a2250db0faaf78c55f5e2c4d00 [diff]
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 2fd9300..2e9b732 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp

@@ -459,9 +459,6 @@
 isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
                   const std::vector<std::pair<int, unsigned> > &TransOps,
                   unsigned ConstCount) {
-  // TransALU can't read 3 constants
-  if (ConstCount > 2)
-    return false;
   for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
     const std::pair<int, unsigned> &Src = TransOps[i];
     unsigned Cycle = getTransSwizzle(TransSwz, i);

diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 0499dd5..0dc0365 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp

@@ -9,6 +9,7 @@
 //
 /// \file
 /// \brief R600 Machine Scheduler interface
+// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
 //
 //===----------------------------------------------------------------------===//
 
@@ -28,7 +29,6 @@
   DAG = dag;
   TII = static_cast<const R600InstrInfo*>(DAG->TII);
   TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
-  VLIW5 = !DAG->MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
   MRI = &DAG->MRI;
   CurInstKind = IDOther;
   CurEmitted = 0;
@@ -342,16 +342,14 @@
   }
 }
 
-SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
+SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
   if (Q.empty())
     return NULL;
   for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
       It != E; ++It) {
     SUnit *SU = *It;
     InstructionsGroupCandidate.push_back(SU->getInstr());
-    if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)
-        && (!AnyALU || !TII->isVectorOnly(SU->getInstr()))
-    ) {
+    if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
       InstructionsGroupCandidate.pop_back();
       Q.erase((It + 1).base());
       return SU;
@@ -375,8 +373,6 @@
   DEBUG(dbgs() << "New Slot\n");
   assert (OccupedSlotsMask && "Slot wasn't filled");
   OccupedSlotsMask = 0;
-//  if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
-//    OccupedSlotsMask |= 16;
   InstructionsGroupCandidate.clear();
   LoadAlu();
 }
@@ -413,12 +409,12 @@
   }
 }
 
-SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
   static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
-  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
+  SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
   if (SlotedSU)
     return SlotedSU;
-  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
+  SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
   if (UnslotedSU)
     AssignSlot(UnslotedSU->getInstr(), Slot);
   return UnslotedSU;
@@ -438,35 +434,30 @@
       // Bottom up scheduling : predX must comes first
       if (!AvailableAlus[AluPredX].empty()) {
         OccupedSlotsMask |= 31;
-        return PopInst(AvailableAlus[AluPredX], false);
+        return PopInst(AvailableAlus[AluPredX]);
       }
       // Flush physical reg copies (RA will discard them)
       if (!AvailableAlus[AluDiscarded].empty()) {
         OccupedSlotsMask |= 31;
-        return PopInst(AvailableAlus[AluDiscarded], false);
+        return PopInst(AvailableAlus[AluDiscarded]);
       }
       // If there is a T_XYZW alu available, use it
       if (!AvailableAlus[AluT_XYZW].empty()) {
         OccupedSlotsMask |= 15;
-        return PopInst(AvailableAlus[AluT_XYZW], false);
+        return PopInst(AvailableAlus[AluT_XYZW]);
       }
     }
     bool TransSlotOccuped = OccupedSlotsMask & 16;
-    if (!TransSlotOccuped && VLIW5) {
+    if (!TransSlotOccuped) {
       if (!AvailableAlus[AluTrans].empty()) {
         OccupedSlotsMask |= 16;
-        return PopInst(AvailableAlus[AluTrans], false);
-      }
-      SUnit *SU = AttemptFillSlot(3, true);
-      if (SU) {
-        OccupedSlotsMask |= 16;
-        return SU;
+        return PopInst(AvailableAlus[AluTrans]);
       }
     }
     for (int Chan = 3; Chan > -1; --Chan) {
       bool isOccupied = OccupedSlotsMask & (1 << Chan);
       if (!isOccupied) {
-        SUnit *SU = AttemptFillSlot(Chan, false);
+        SUnit *SU = AttemptFillSlot(Chan);
         if (SU) {
           OccupedSlotsMask |= (1 << Chan);
           InstructionsGroupCandidate.push_back(SU->getInstr());

diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index 0a6f120..f8965d8 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h

@@ -84,16 +84,15 @@
 
 private:
   std::vector<MachineInstr *> InstructionsGroupCandidate;
-  bool VLIW5;
 
   int getInstKind(SUnit *SU);
   bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
   AluKind getAluKind(SUnit *SU) const;
   void LoadAlu();
   unsigned AvailablesAluCount() const;
-  SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu);
+  SUnit *AttemptFillSlot (unsigned Slot);
   void PrepareNextSlot();
-  SUnit *PopInst(std::vector<SUnit*> &Q, bool AnyALU);
+  SUnit *PopInst(std::vector<SUnit*> &Q);
 
   void AssignSlot(MachineInstr *MI, unsigned Slot);
   SUnit* pickAlu();

diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index 6c70052..5cf1fd3 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp

@@ -58,8 +58,6 @@
 private:
   const R600InstrInfo *TII;
   const R600RegisterInfo &TRI;
-  bool VLIW5;
-  bool ConsideredInstUsesAlreadyWrittenVectorElement;
 
   unsigned getSlot(const MachineInstr *MI) const {
     return TRI.getHWRegChan(MI->getOperand(0).getReg());
@@ -76,13 +74,7 @@
     MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
     if (I->isBundle())
       BI++;
-    int LastDstChan = -1;
     do {
-      bool isTrans = false;
-      int BISlot = getSlot(BI);
-      if (LastDstChan >= BISlot)
-        isTrans = true;
-      LastDstChan = BISlot;
       if (TII->isPredicated(BI))
         continue;
       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
@@ -93,7 +85,7 @@
         continue;
       }
       unsigned Dst = BI->getOperand(DstIdx).getReg();
-      if (isTrans || TII->isTransOnly(BI)) {
+      if (TII->isTransOnly(BI)) {
         Result[Dst] = AMDGPU::PS;
         continue;
       }
@@ -150,14 +142,10 @@
                         MachineDominatorTree &MDT)
   : VLIWPacketizerList(MF, MLI, MDT, true),
     TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
-    TRI(TII->getRegisterInfo()) {
-    VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
-  }
+    TRI(TII->getRegisterInfo()) { }
 
   // initPacketizerState - initialize some internal flags.
-  void initPacketizerState() {
-    ConsideredInstUsesAlreadyWrittenVectorElement = false;
-  }
+  void initPacketizerState() { }
 
   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
   bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
@@ -184,8 +172,8 @@
   // together.
   bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
     MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
-    if (getSlot(MII) == getSlot(MIJ))
-      ConsideredInstUsesAlreadyWrittenVectorElement = true;
+    if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
+      return false;
     // Does MII and MIJ share the same pred_sel ?
     int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
         OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
@@ -223,20 +211,6 @@
                                  std::vector<R600InstrInfo::BankSwizzle> &BS,
                                  bool &isTransSlot) {
     isTransSlot = TII->isTransOnly(MI);
-    assert (!isTransSlot || VLIW5);
-
-    // Is the dst reg sequence legal ?
-    if (!isTransSlot && !CurrentPacketMIs.empty()) {
-      if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
-        if (ConsideredInstUsesAlreadyWrittenVectorElement  &&
-            !TII->isVectorOnly(MI) && VLIW5) {
-          isTransSlot = true;
-          DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
-        }
-        else
-          return false;
-      }
-    }
 
     // Are the Constants limitations met ?
     CurrentPacketMIs.push_back(MI);
@@ -304,8 +278,6 @@
       return It;
     }
     endPacket(MI->getParent(), MI);
-    if (TII->isTransOnly(MI))
-      return MI;
     return VLIWPacketizerList::addToPacket(MI);
   }
 };
commit	6b3f6a744a6d16c5d62dc3477186035e8a74a8e9	[log] [tgz]
author	Tom Stellard <thomas.stellard@amd.com>	Wed Jul 31 20:43:27 2013 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	Wed Jul 31 20:43:27 2013 +0000
tree	1ace9471553c04326bc676bbc6b2cea18352a250
parent	5519dc9de88b36a2250db0faaf78c55f5e2c4d00 [diff]