Add "blocked" heuristic to the Hexagon MI scheduler.

  Improve AQ instruction selection in the Hexagon MI scheduler.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163523 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 6a37639..b131a8f 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -190,6 +190,9 @@
   std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
     unsigned Limit = TRI->getRegPressureSetLimit(i);
+    DEBUG(dbgs() << TRI->getRegPressureSetName(i)
+          << "Limit " << Limit
+          << " Actual " << RegionPressure[i] << "\n");
     if (RegionPressure[i] > Limit)
       RegionCriticalPSets.push_back(PressureElement(i, 0));
   }
@@ -199,11 +202,6 @@
             RegionCriticalPSets[i].PSetID) << " ";
         dbgs() << "\n");
 
-  // Reset resource state.
-  TopResourceModel->resetPacketState();
-  TopResourceModel->resetDFA();
-  BotResourceModel->resetPacketState();
-  BotResourceModel->resetDFA();
   TotalPackets = 0;
 }
 
@@ -264,13 +262,15 @@
 }
 
 /// Keep track of available resources.
-void VLIWResourceModel::reserveResources(SUnit *SU) {
+bool VLIWResourceModel::reserveResources(SUnit *SU) {
+  bool startNewCycle = false;
   // If this SU does not fit in the packet
   // start a new one.
   if (!isResourceAvailable(SU)) {
     ResourcesModel->clearResources();
     Packet.clear();
     TotalPackets++;
+    startNewCycle = true;
   }
 
   switch (SU->getInstr()->getOpcode()) {
@@ -295,7 +295,8 @@
   DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
   for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
     DEBUG(dbgs() << "\t[" << i << "] SU(");
-    DEBUG(dbgs() << Packet[i]->NodeNum << ")\n");
+    DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+    DEBUG(Packet[i]->getInstr()->dump());
   }
 #endif
 
@@ -305,7 +306,10 @@
     ResourcesModel->clearResources();
     Packet.clear();
     TotalPackets++;
+    startNewCycle = true;
   }
+
+  return startNewCycle;
 }
 
 // Release all DAG roots for scheduling.
@@ -352,6 +356,17 @@
   // Initialize top/bottom trackers after computing region pressure.
   initRegPressure();
 
+  // To view Height/Depth correctly, they should be accessed at least once.
+  DEBUG(unsigned maxH = 0;
+        for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          if (SUnits[su].getHeight() > maxH)
+            maxH = SUnits[su].getHeight();
+        dbgs() << "Max Height " << maxH << "\n";);
+  DEBUG(unsigned maxD = 0;
+        for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          if (SUnits[su].getDepth() > maxD)
+            maxD = SUnits[su].getDepth();
+        dbgs() << "Max Depth " << maxD << "\n";);
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
 
@@ -390,13 +405,9 @@
       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
       updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
 
-      // Update DFA state.
-      TopResourceModel->reserveResources(SU);
-
       // Release dependent instructions for scheduling.
       releaseSuccessors(SU);
-    }
-    else {
+    } else {
       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
       MachineBasicBlock::iterator priorII =
         priorNonDebug(CurrentBottom, CurrentTop);
@@ -415,9 +426,6 @@
       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
       updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
 
-      // Update DFA state.
-      BotResourceModel->reserveResources(SU);
-
       // Release dependent instructions for scheduling.
       releasePredecessors(SU);
     }
@@ -426,9 +434,6 @@
   }
   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 
-  DEBUG(dbgs() << "Final schedule has " << TopResourceModel->getTotalPackets() +
-        BotResourceModel->getTotalPackets()<< "packets.\n");
-
   placeDebugValues();
 }
 
@@ -465,6 +470,9 @@
   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
 
+  Top.ResourceModel = new VLIWResourceModel(TM);
+  Bot.ResourceModel = new VLIWResourceModel(TM);
+
   assert((!ForceTopDown || !ForceBottomUp) &&
          "-misched-topdown incompatible with -misched-bottomup");
 }
@@ -553,8 +561,7 @@
   if (!HazardRec->isEnabled()) {
     // Bypass HazardRec virtual calls.
     CurrCycle = NextCycle;
-  }
-  else {
+  } else {
     // Bypass getHazardType calls in case of long latency.
     for (; CurrCycle != NextCycle; ++CurrCycle) {
       if (isTop())
@@ -571,6 +578,7 @@
 
 /// Move the boundary of scheduled code by one SUnit.
 void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
+  bool startNewCycle = false;
 
   // Update the reservation table.
   if (HazardRec->isEnabled()) {
@@ -581,13 +589,20 @@
     }
     HazardRec->EmitInstruction(SU);
   }
+
+  // Update DFA model.
+  startNewCycle = ResourceModel->reserveResources(SU);
+
   // Check the instruction group dispatch limit.
   // TODO: Check if this SU must end a dispatch group.
   IssueCount += DAG->getNumMicroOps(SU->getInstr());
-  if (IssueCount >= DAG->getIssueWidth()) {
+  if (startNewCycle) {
     DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
     bumpCycle();
   }
+  else
+    DEBUG(dbgs() << "*** IssueCount " << IssueCount
+          << " at cycle " << CurrCycle << '\n');
 }
 
 /// Release pending ready nodes in to the available queue. This makes them
@@ -648,8 +663,9 @@
 }
 
 #ifndef NDEBUG
-void ConvergingVLIWScheduler::traceCandidate(const char *Label, const ReadyQueue &Q,
-                                         SUnit *SU, PressureElement P) {
+void ConvergingVLIWScheduler::traceCandidate(const char *Label,
+                                             const ReadyQueue &Q,
+                                             SUnit *SU, PressureElement P) {
   dbgs() << Label << " " << Q.getName() << " ";
   if (P.isValid())
     dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
@@ -660,10 +676,48 @@
 }
 #endif
 
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->getSUnit();
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return 0;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+  return OnlyAvailablePred;
+}
+
+/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
+/// of SU, return it, otherwise return null.
+static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
+  SUnit *OnlyAvailableSucc = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    SUnit &Succ = *I->getSUnit();
+    if (!Succ.isScheduled) {
+      // We found an available, but not scheduled, successor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
+        return 0;
+      OnlyAvailableSucc = &Succ;
+    }
+  }
+  return OnlyAvailableSucc;
+}
+
 // Constants used to denote relative importance of
 // heuristic components for cost computation.
 static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
 static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 20;
 static const unsigned ScaleTwo = 10;
 static const unsigned FactorOne = 2;
 
@@ -685,19 +739,44 @@
     ResCount += PriorityOne;
 
   // Critical path first.
-  if (Q.getID() == TopQID)
+  if (Q.getID() == TopQID) {
     ResCount += (SU->getHeight() * ScaleTwo);
-  else
+
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (Top.ResourceModel->isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+  } else {
     ResCount += (SU->getDepth() * ScaleTwo);
 
-  // If resources are available for it, multiply the
-  // chance of scheduling.
-  if (DAG->getTopResourceModel()->isResourceAvailable(SU))
-    ResCount <<= FactorOne;
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (Bot.ResourceModel->isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+  }
+
+  unsigned NumNodesBlocking = 0;
+  if (Q.getID() == TopQID) {
+    // How many SUs does it block from scheduling?
+    // Look at all of the successors of this node.
+    // Count the number of nodes that
+    // this node is the sole unscheduled node for.
+    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I)
+      if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+        ++NumNodesBlocking;
+  } else {
+    // How many unscheduled predecessors block this node?
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I)
+      if (getSingleUnscheduledSucc(I->getSUnit()) == SU)
+        ++NumNodesBlocking;
+  }
+  ResCount += (NumNodesBlocking * ScaleTwo);
 
   // Factor in reg pressure as a heuristic.
-  ResCount -= (Delta.Excess.UnitIncrease * PriorityThree);
-  ResCount -= (Delta.CriticalMax.UnitIncrease * PriorityThree);
+  ResCount -= (Delta.Excess.UnitIncrease*PriorityThree);
+  ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree);
 
   DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
 
@@ -736,7 +815,6 @@
       continue;
     }
 
-
     // Best cost.
     if (CurrentCost > Candidate.SCost) {
       DEBUG(traceCandidate("CCAND", Q, *I));
@@ -859,14 +937,14 @@
 }
 
 /// Update the scheduler's state after scheduling a node. This is the same node
-/// that was just returned by pickNode(). However, VLIWMachineScheduler needs to update
-/// it's state based on the current cycle before MachineSchedStrategy does.
+/// that was just returned by pickNode(). However, VLIWMachineScheduler needs
+/// to update it's state based on the current cycle before MachineSchedStrategy
+/// does.
 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   if (IsTopNode) {
     SU->TopReadyCycle = Top.CurrCycle;
     Top.bumpNode(SU);
-  }
-  else {
+  } else {
     SU->BotReadyCycle = Bot.CurrCycle;
     Bot.bumpNode(SU);
   }