MI-Sched: Model "reserved" processor resources.

This allows a target to use MI-Sched as an in-order scheduler that
will model strict resource conflicts without defining a processor
itinerary. Instead, the target can now use the new per-operand machine
model and define in-order resources with BufferSize=0. For example,
this would allow restricting the type of operations that can be formed
into a dispatch group. (Normally NumMicroOps is sufficient to enforce
dispatch groups).

If the intent is to model latency in in-order pipeline, as opposed to
resource conflicts, then a resource with BufferSize=1 should be
defined instead.

This feature is only casually tested as there are no in-tree targets
using it yet. However, Hal will be experimenting with POWER7.

llvm-svn: 196517
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 3296149..6cfedcb 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1322,6 +1322,8 @@
 // GenericScheduler - Implementation of the generic MachineSchedStrategy.
 //===----------------------------------------------------------------------===//
 
+static const unsigned InvalidCycle = ~0U;
+
 namespace {
 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
 /// the schedule.
@@ -1491,6 +1493,10 @@
     // Is the scheduled region resource limited vs. latency limited.
     bool IsResourceLimited;
 
+    // Record the highest cycle at which each resource has been reserved by a
+    // scheduled instruction.
+    SmallVector<unsigned, 16> ReservedCycles;
+
 #ifndef NDEBUG
     // Remember the greatest operand latency as an upper bound on the number of
     // times we should retry the pending queue because of a hazard.
@@ -1518,6 +1524,7 @@
       MaxExecutedResCount = 0;
       ZoneCritResIdx = 0;
       IsResourceLimited = false;
+      ReservedCycles.clear();
 #ifndef NDEBUG
       MaxObservedLatency = 0;
 #endif
@@ -1587,6 +1594,8 @@
     /// cycle.
     unsigned getLatencyStallCycles(SUnit *SU);
 
+    unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
+
     bool checkHazard(SUnit *SU);
 
     unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
@@ -1708,8 +1717,10 @@
   DAG = dag;
   SchedModel = smodel;
   Rem = rem;
-  if (SchedModel->hasInstrSchedModel())
+  if (SchedModel->hasInstrSchedModel()) {
     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
+    ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
+  }
 }
 
 /// Initialize the per-region scheduling policy.
@@ -1890,6 +1901,20 @@
   return 0;
 }
 
+/// Compute the next cycle at which the given processor resource can be
+/// scheduled.
+unsigned GenericScheduler::SchedBoundary::
+getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+  unsigned NextUnreserved = ReservedCycles[PIdx];
+  // If this resource has never been used, always return cycle zero.
+  if (NextUnreserved == InvalidCycle)
+    return 0;
+  // For bottom-up scheduling add the cycles needed for the current operation.
+  if (!isTop())
+    NextUnreserved += Cycles;
+  return NextUnreserved;
+}
+
 /// Does this SU have a hazard within the current instruction group.
 ///
 /// The scheduler supports two modes of hazard recognition. The first is the
@@ -1913,6 +1938,15 @@
           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
     return true;
   }
+  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
+    const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+      if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
+        return true;
+    }
+  }
   return false;
 }
 
@@ -2097,7 +2131,7 @@
 /// \return the next cycle at which the instruction may execute without
 /// oversubscribing resources.
 unsigned GenericScheduler::SchedBoundary::
-countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
+countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
   unsigned Factor = SchedModel->getResourceFactor(PIdx);
   unsigned Count = Factor * Cycles;
   DEBUG(dbgs() << "  " << getResourceName(PIdx)
@@ -2116,8 +2150,14 @@
           << getResourceName(PIdx) << ": "
           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
   }
-  // TODO: We don't yet model reserved resources. It's not hard though.
-  return CurrCycle;
+  // For reserved resources, record the highest cycle using the resource.
+  unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
+  if (NextAvailable > CurrCycle) {
+    DEBUG(dbgs() << "  Resource conflict: "
+          << SchedModel->getProcResource(PIdx)->Name << " reserved until @"
+          << NextAvailable << "\n");
+  }
+  return NextAvailable;
 }
 
 /// Move the boundary of scheduled code by one SUnit.
@@ -2131,25 +2171,17 @@
     }
     HazardRec->EmitInstruction(SU);
   }
+  // checkHazard should prevent scheduling multiple instructions per cycle that
+  // exceed the issue width.
   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
-  CurrMOps += IncMOps;
-  // checkHazard prevents scheduling multiple instructions per cycle that exceed
-  // issue width. However, we commonly reach the maximum. In this case
-  // opportunistically bump the cycle to avoid uselessly checking everything in
-  // the readyQ. Furthermore, a single instruction may produce more than one
-  // cycle's worth of micro-ops.
-  //
-  // TODO: Also check if this SU must end a dispatch group.
-  unsigned NextCycle = CurrCycle;
-  if (CurrMOps >= SchedModel->getIssueWidth()) {
-    ++NextCycle;
-    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
-          << " at cycle " << CurrCycle << '\n');
-  }
+  assert(CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth() &&
+         "Cannot scheduling this instructions MicroOps in the current cycle.");
+
   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
 
+  unsigned NextCycle = CurrCycle;
   switch (SchedModel->getMicroOpBufferSize()) {
   case 0:
     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
@@ -2194,10 +2226,23 @@
            PI = SchedModel->getWriteProcResBegin(SC),
            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
       unsigned RCycle =
-        countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
+        countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
       if (RCycle > NextCycle)
         NextCycle = RCycle;
     }
+    if (SU->hasReservedResource) {
+      // For reserved resources, record the highest cycle using the resource.
+      // For top-down scheduling, this is the cycle in which we schedule this
+      // instruction plus the number of cycles the operations reserves the
+      // resource. For bottom-up is it simply the instruction's cycle.
+      for (TargetSchedModel::ProcResIter
+             PI = SchedModel->getWriteProcResBegin(SC),
+             PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+        unsigned PIdx = PI->ProcResourceIdx;
+        if (SchedModel->getProcResource(PIdx)->BufferSize == 0)
+          ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
+      }
+    }
   }
   // Update ExpectedLatency and DependentLatency.
   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
@@ -2224,6 +2269,16 @@
       (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
       > (int)LFactor;
   }
+  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
+  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
+  // one cycle.  Since we commonly reach the max MOps here, opportunistically
+  // bump the cycle to avoid uselessly checking everything in the readyQ.
+  CurrMOps += IncMOps;
+  while (CurrMOps >= SchedModel->getIssueWidth()) {
+    bumpCycle(++NextCycle);
+    DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
+          << " at cycle " << CurrCycle << '\n');
+  }
   DEBUG(dumpScheduledState());
 }