Fix some register-alias-related bugs in the post-RA scheduler liveness
computation code. Also, avoid adding output-depenency edges when both
defs are dead, which frequently happens with EFLAGS defs.

Compute Depth and Height lazily, and always in terms of edge latency
values. For the schedulers that don't care about latency, edge latencies
are set to 1.

Eliminate Cycle and CycleBound, and LatencyPriorityQueue's Latencies array.
These are all subsumed by the Depth and Height fields.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61073 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 2abbf36..7ad9ac9 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -41,47 +41,6 @@
 }
 
 
-/// CalcNodePriority - Calculate the maximal path from the node to the exit.
-///
-void LatencyPriorityQueue::CalcLatency(const SUnit &SU) {
-  int &Latency = Latencies[SU.NodeNum];
-  if (Latency != -1)
-    return;
-
-  std::vector<const SUnit*> WorkList;
-  WorkList.push_back(&SU);
-  while (!WorkList.empty()) {
-    const SUnit *Cur = WorkList.back();
-    bool AllDone = true;
-    unsigned MaxSuccLatency = 0;
-    for (SUnit::const_succ_iterator I = Cur->Succs.begin(),E = Cur->Succs.end();
-         I != E; ++I) {
-      int SuccLatency = Latencies[I->getSUnit()->NodeNum];
-      if (SuccLatency == -1) {
-        AllDone = false;
-        WorkList.push_back(I->getSUnit());
-      } else {
-        unsigned NewLatency = SuccLatency + I->getLatency();
-        MaxSuccLatency = std::max(MaxSuccLatency, NewLatency);
-      }
-    }
-    if (AllDone) {
-      Latencies[Cur->NodeNum] = MaxSuccLatency;
-      WorkList.pop_back();
-    }
-  }
-}
-
-/// CalculatePriorities - Calculate priorities of all scheduling units.
-void LatencyPriorityQueue::CalculatePriorities() {
-  Latencies.assign(SUnits->size(), -1);
-  NumNodesSolelyBlocking.assign(SUnits->size(), 0);
-
-  // For each node, calculate the maximal path from the node to the exit.
-  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
-    CalcLatency((*SUnits)[i]);
-}
-
 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
 /// of SU, return it, otherwise return null.
 SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 2f7c011..0fc6c33 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -23,7 +23,9 @@
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -49,6 +51,14 @@
     static char ID;
     PostRAScheduler() : MachineFunctionPass(&ID) {}
 
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
     const char *getPassName() const {
       return "Post RA top-down list latency scheduler";
     }
@@ -72,8 +82,10 @@
     ScheduleDAGTopologicalSort Topo;
 
   public:
-    SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm)
-      : ScheduleDAGInstrs(mbb, tm), Topo(SUnits) {}
+    SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm,
+                         const MachineLoopInfo &MLI,
+                         const MachineDominatorTree &MDT)
+      : ScheduleDAGInstrs(mbb, tm, MLI, MDT), Topo(SUnits) {}
 
     void Schedule();
 
@@ -88,11 +100,14 @@
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   DOUT << "PostRAScheduler\n";
 
+  const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
        MBB != MBBe; ++MBB) {
 
-    SchedulePostRATDList Scheduler(MBB, Fn.getTarget());
+    SchedulePostRATDList Scheduler(MBB, Fn.getTarget(), MLI, MDT);
 
     Scheduler.Run();
 
@@ -142,6 +157,28 @@
   return TRI->getRegClass(II.OpInfo[Op].RegClass);
 }
 
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SDep *CriticalPathStep(SUnit *SU) {
+  SDep *Next = 0;
+  unsigned NextDepth = 0;
+  // Find the predecessor edge with the greatest depth.
+  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+       P != PE; ++P) {
+    SUnit *PredSU = P->getSUnit();
+    unsigned PredLatency = P->getLatency();
+    unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+    // In the case of a latency tie, prefer an anti-dependency edge over
+    // other types of edges.
+    if (NextDepth < PredTotalLatency ||
+        (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+      NextDepth = PredTotalLatency;
+      Next = &*P;
+    }
+  }
+  return Next;
+}
+
 /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
 /// of the ScheduleDAG and break them by renaming registers.
 ///
@@ -150,34 +187,16 @@
   // so just duck out immediately if the block is empty.
   if (BB->empty()) return false;
 
-  Topo.InitDAGTopologicalSorting();
-
-  // Compute a critical path for the DAG.
+  // Find the node at the bottom of the critical path.
   SUnit *Max = 0;
-  std::vector<SDep *> CriticalPath(SUnits.size());
-  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
-       E = Topo.end(); I != E; ++I) {
-    SUnit *SU = &SUnits[*I];
-    for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
-         P != PE; ++P) {
-      SUnit *PredSU = P->getSUnit();
-      // This assumes that there's no delay for reusing registers.
-      unsigned PredLatency = P->getLatency();
-      unsigned PredTotalLatency = PredSU->CycleBound + PredLatency;
-      if (SU->CycleBound < PredTotalLatency ||
-          (SU->CycleBound == PredTotalLatency &&
-           P->getKind() == SDep::Anti)) {
-        SU->CycleBound = PredTotalLatency;
-        CriticalPath[*I] = &*P;
-      }
-    }
-    // Keep track of the node at the end of the critical path.
-    if (!Max || SU->CycleBound + SU->Latency > Max->CycleBound + Max->Latency)
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
       Max = SU;
   }
 
   DOUT << "Critical path has total latency "
-       << (Max ? Max->CycleBound + Max->Latency : 0) << "\n";
+       << (Max ? Max->getDepth() + Max->Latency : 0) << "\n";
 
   // Walk the critical path from the bottom up. Collect all anti-dependence
   // edges on the critical path. Skip anti-dependencies between SUnits that
@@ -195,9 +214,9 @@
   // the anti-dependencies in an instruction in order to be effective.
   BitVector AllocatableSet = TRI->getAllocatableSet(*MF);
   DenseMap<MachineInstr *, unsigned> CriticalAntiDeps;
-  for (SUnit *SU = Max; CriticalPath[SU->NodeNum];
-       SU = CriticalPath[SU->NodeNum]->getSUnit()) {
-    SDep *Edge = CriticalPath[SU->NodeNum];
+  SUnit *SU = Max;
+  for (SDep *Edge = CriticalPathStep(SU); Edge;
+       Edge = CriticalPathStep(SU = Edge->getSUnit())) {
     SUnit *NextSU = Edge->getSUnit();
     // Only consider anti-dependence edges.
     if (Edge->getKind() != SDep::Anti)
@@ -494,6 +513,11 @@
         Classes[SubregReg] = 0;
         RegRefs.erase(SubregReg);
       }
+      for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+           *Super; ++Super) {
+        unsigned SuperReg = *Super;
+        Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      }
     }
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
@@ -556,8 +580,7 @@
   // Compute how many cycles it will be before this actually becomes
   // available.  This is the max of the start time of all predecessors plus
   // their latencies.
-  unsigned PredDoneCycle = SU->Cycle + SuccEdge->getLatency();
-  SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle);
+  SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
   
   if (SuccSU->NumPredsLeft == 0) {
     PendingQueue.push_back(SuccSU);
@@ -572,7 +595,8 @@
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
 
   // Top down: release successors.
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -603,21 +627,21 @@
   while (!AvailableQueue.empty() || !PendingQueue.empty()) {
     // Check to see if any of the pending instructions are ready to issue.  If
     // so, add them to the available queue.
+    unsigned MinDepth = ~0u;
     for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-      if (PendingQueue[i]->CycleBound == CurCycle) {
+      if (PendingQueue[i]->getDepth() <= CurCycle) {
         AvailableQueue.push(PendingQueue[i]);
         PendingQueue[i]->isAvailable = true;
         PendingQueue[i] = PendingQueue.back();
         PendingQueue.pop_back();
         --i; --e;
-      } else {
-        assert(PendingQueue[i]->CycleBound > CurCycle && "Non-positive latency?");
-      }
+      } else if (PendingQueue[i]->getDepth() < MinDepth)
+        MinDepth = PendingQueue[i]->getDepth();
     }
     
     // If there are no instructions available, don't try to issue anything.
     if (AvailableQueue.empty()) {
-      ++CurCycle;
+      CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
       continue;
     }
 
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 7ceab3a..dad5eb5 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -33,115 +33,6 @@
 
 ScheduleDAG::~ScheduleDAG() {}
 
-/// CalculateDepths - compute depths using algorithms for the longest
-/// paths in the DAG
-void ScheduleDAG::CalculateDepths() {
-  unsigned DAGSize = SUnits.size();
-  std::vector<SUnit*> WorkList;
-  WorkList.reserve(DAGSize);
-
-  // Initialize the data structures
-  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    unsigned Degree = SU->Preds.size();
-    // Temporarily use the Depth field as scratch space for the degree count.
-    SU->Depth = Degree;
-
-    // Is it a node without dependencies?
-    if (Degree == 0) {
-      assert(SU->Preds.empty() && "SUnit should have no predecessors");
-      // Collect leaf nodes
-      WorkList.push_back(SU);
-    }
-  }
-
-  // Process nodes in the topological order
-  while (!WorkList.empty()) {
-    SUnit *SU = WorkList.back();
-    WorkList.pop_back();
-    unsigned SUDepth = 0;
-
-    // Use dynamic programming:
-    // When current node is being processed, all of its dependencies
-    // are already processed.
-    // So, just iterate over all predecessors and take the longest path
-    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-         I != E; ++I) {
-      unsigned PredDepth = I->getSUnit()->Depth;
-      if (PredDepth+1 > SUDepth) {
-        SUDepth = PredDepth + 1;
-      }
-    }
-
-    SU->Depth = SUDepth;
-
-    // Update degrees of all nodes depending on current SUnit
-    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-         I != E; ++I) {
-      SUnit *SU = I->getSUnit();
-      if (!--SU->Depth)
-        // If all dependencies of the node are processed already,
-        // then the longest path for the node can be computed now
-        WorkList.push_back(SU);
-    }
-  }
-}
-
-/// CalculateHeights - compute heights using algorithms for the longest
-/// paths in the DAG
-void ScheduleDAG::CalculateHeights() {
-  unsigned DAGSize = SUnits.size();
-  std::vector<SUnit*> WorkList;
-  WorkList.reserve(DAGSize);
-
-  // Initialize the data structures
-  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    unsigned Degree = SU->Succs.size();
-    // Temporarily use the Height field as scratch space for the degree count.
-    SU->Height = Degree;
-
-    // Is it a node without dependencies?
-    if (Degree == 0) {
-      assert(SU->Succs.empty() && "Something wrong");
-      assert(WorkList.empty() && "Should be empty");
-      // Collect leaf nodes
-      WorkList.push_back(SU);
-    }
-  }
-
-  // Process nodes in the topological order
-  while (!WorkList.empty()) {
-    SUnit *SU = WorkList.back();
-    WorkList.pop_back();
-    unsigned SUHeight = 0;
-
-    // Use dynamic programming:
-    // When current node is being processed, all of its dependencies
-    // are already processed.
-    // So, just iterate over all successors and take the longest path
-    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-         I != E; ++I) {
-      unsigned SuccHeight = I->getSUnit()->Height;
-      if (SuccHeight+1 > SUHeight) {
-        SUHeight = SuccHeight + 1;
-      }
-    }
-
-    SU->Height = SUHeight;
-
-    // Update degrees of all nodes depending on current SUnit
-    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-         I != E; ++I) {
-      SUnit *SU = I->getSUnit();
-      if (!--SU->Height)
-        // If all dependencies of the node are processed already,
-        // then the longest path for the node can be computed now
-        WorkList.push_back(SU);
-    }
-  }
-}
-
 /// dump - dump the schedule.
 void ScheduleDAG::dumpSchedule() const {
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -171,13 +62,10 @@
   for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i)
     if (Preds[i] == D)
       return;
-  // Add a pred to this SUnit.
-  Preds.push_back(D);
   // Now add a corresponding succ to N.
   SDep P = D;
   P.setSUnit(this);
   SUnit *N = D.getSUnit();
-  N->Succs.push_back(P);
   // Update the bookkeeping.
   if (D.getKind() == SDep::Data) {
     ++NumPreds;
@@ -187,6 +75,10 @@
     ++NumPredsLeft;
   if (!isScheduled)
     ++N->NumSuccsLeft;
+  N->Succs.push_back(P);
+  Preds.push_back(D);
+  this->setDepthDirty();
+  N->setHeightDirty();
 }
 
 /// removePred - This removes the specified edge as a pred of the current
@@ -220,10 +112,128 @@
         --NumPredsLeft;
       if (!isScheduled)
         --N->NumSuccsLeft;
+      this->setDepthDirty();
+      N->setHeightDirty();
       return;
     }
 }
 
+void SUnit::setDepthDirty() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back();
+    WorkList.pop_back();
+    if (!SU->isDepthCurrent) continue;
+    SU->isDepthCurrent = false;
+    for (SUnit::const_succ_iterator I = Succs.begin(),
+         E = Succs.end(); I != E; ++I)
+      WorkList.push_back(I->getSUnit());
+  }
+}
+
+void SUnit::setHeightDirty() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back();
+    WorkList.pop_back();
+    if (!SU->isHeightCurrent) continue;
+    SU->isHeightCurrent = false;
+    for (SUnit::const_pred_iterator I = Preds.begin(),
+         E = Preds.end(); I != E; ++I)
+      WorkList.push_back(I->getSUnit());
+  }
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+  if (NewDepth <= Depth)
+    return;
+  setDepthDirty();
+  Depth = NewDepth;
+  isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+  if (NewHeight <= Height)
+    return;
+  setHeightDirty();
+  Height = NewHeight;
+  isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  while (!WorkList.empty()) {
+    SUnit *Cur = WorkList.back();
+
+    bool Done = true;
+    unsigned MaxPredDepth = 0;
+    for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+         E = Cur->Preds.end(); I != E; ++I) {
+      SUnit *PredSU = I->getSUnit();
+      if (PredSU->isDepthCurrent)
+        MaxPredDepth = std::max(MaxPredDepth,
+                                PredSU->Depth + I->getLatency());
+      else {
+        Done = false;
+        WorkList.push_back(PredSU);
+      }
+    }
+
+    if (Done) {
+      WorkList.pop_back();
+      if (MaxPredDepth != Cur->Depth) {
+        Cur->setDepthDirty();
+        Cur->Depth = MaxPredDepth;
+      }
+      Cur->isDepthCurrent = true;
+    }
+  }
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  while (!WorkList.empty()) {
+    SUnit *Cur = WorkList.back();
+
+    bool Done = true;
+    unsigned MaxSuccHeight = 0;
+    for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+         E = Cur->Succs.end(); I != E; ++I) {
+      SUnit *SuccSU = I->getSUnit();
+      if (SuccSU->isHeightCurrent)
+        MaxSuccHeight = std::max(MaxSuccHeight,
+                                 SuccSU->Height + I->getLatency());
+      else {
+        Done = false;
+        WorkList.push_back(SuccSU);
+      }
+    }
+
+    if (Done) {
+      WorkList.pop_back();
+      if (MaxSuccHeight != Cur->Height) {
+        Cur->setHeightDirty();
+        Cur->Height = MaxSuccHeight;
+      }
+      Cur->isHeightCurrent = true;
+    }
+  }
+}
+
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
 void SUnit::dump(const ScheduleDAG *G) const {
@@ -299,11 +309,14 @@
       cerr << "has not been scheduled!\n";
       AnyNotSched = true;
     }
-    if (SUnits[i].isScheduled && SUnits[i].Cycle > (unsigned)INT_MAX) {
+    if (SUnits[i].isScheduled &&
+        (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) >
+          unsigned(INT_MAX)) {
       if (!AnyNotSched)
         cerr << "*** Scheduling failed! ***\n";
       SUnits[i].dump(this);
-      cerr << "has an unexpected Cycle value!\n";
+      cerr << "has an unexpected "
+           << (isBottomUp ? "Height" : "Depth") << " value!\n";
       AnyNotSched = true;
     }
     if (isBottomUp) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index fa42d39..cfa639e 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,19 +13,27 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "sched-instrs"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
 #include <map>
 using namespace llvm;
 
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb,
-                                     const TargetMachine &tm)
-  : ScheduleDAG(0, bb, tm) {}
+                                     const TargetMachine &tm,
+                                     const MachineLoopInfo &mli,
+                                     const MachineDominatorTree &mdt)
+  : ScheduleDAG(0, bb, tm), MLI(mli), MDT(mdt) {}
 
 void ScheduleDAGInstrs::BuildSchedUnits() {
   SUnits.clear();
@@ -35,7 +43,7 @@
   // to top.
 
   // Remember where defs and uses of each physical register are as we procede.
-  SUnit *Defs[TargetRegisterInfo::FirstVirtualRegister] = {};
+  std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister] = {};
   std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister] = {};
 
   // Remember where unknown loads are after the most recent unknown store
@@ -57,13 +65,20 @@
   // all the work of the block is done before the terminator.
   SUnit *Terminator = 0;
 
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
   for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin();
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
+    const TargetInstrDesc &TID = MI->getDesc();
     SUnit *SU = NewSUnit(MI);
 
     // Assign the Latency field of SU using target-provided information.
-    ComputeLatency(SU);
+    if (UnitLatencies)
+      SU->Latency = 1;
+    else
+      ComputeLatency(SU);
 
     // Add register-based dependencies (data, anti, and output).
     for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
@@ -74,33 +89,51 @@
 
       assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
       std::vector<SUnit *> &UseList = Uses[Reg];
-      SUnit *&Def = Defs[Reg];
+      std::vector<SUnit *> &DefList = Defs[Reg];
       // Optionally add output and anti dependencies.
       // TODO: Using a latency of 1 here assumes there's no cost for
       //       reusing registers.
       SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
-      if (Def && Def != SU)
-        Def->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
+      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+        SUnit *DefSU = DefList[i];
+        if (DefSU != SU &&
+            (Kind != SDep::Output || !MO.isDead() ||
+             !DefSU->getInstr()->registerDefIsDead(Reg)))
+          DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg));
+      }
       for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        SUnit *&Def = Defs[*Alias];
-        if (Def && Def != SU)
-          Def->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
+        std::vector<SUnit *> &DefList = Defs[*Alias];
+        for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+          SUnit *DefSU = DefList[i];
+          if (DefSU != SU &&
+              (Kind != SDep::Output || !MO.isDead() ||
+               !DefSU->getInstr()->registerDefIsDead(Reg)))
+            DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias));
+        }
       }
 
       if (MO.isDef()) {
         // Add any data dependencies.
-        for (unsigned i = 0, e = UseList.size(); i != e; ++i)
-          if (UseList[i] != SU)
-            UseList[i]->addPred(SDep(SU, SDep::Data, SU->Latency, Reg));
+        unsigned DataLatency = SU->Latency;
+        for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+          SUnit *UseSU = UseList[i];
+          if (UseSU != SU) {
+            UseSU->addPred(SDep(SU, SDep::Data, DataLatency, Reg));
+          }
+        }
         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
           std::vector<SUnit *> &UseList = Uses[*Alias];
-          for (unsigned i = 0, e = UseList.size(); i != e; ++i)
-            if (UseList[i] != SU)
-              UseList[i]->addPred(SDep(SU, SDep::Data, SU->Latency, *Alias));
+          for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+            SUnit *UseSU = UseList[i];
+            if (UseSU != SU)
+              UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias));
+          }
         }
 
         UseList.clear();
-        Def = SU;
+        if (!MO.isDead())
+          DefList.clear();
+        DefList.push_back(SU);
       } else {
         UseList.push_back(SU);
       }
@@ -111,7 +144,6 @@
     // after stack slots are lowered to actual addresses.
     // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
     // produce more precise dependence information.
-    const TargetInstrDesc &TID = MI->getDesc();
     if (TID.isCall() || TID.isReturn() || TID.isBranch() ||
         TID.hasUnmodeledSideEffects()) {
     new_chain:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 6ac608f..d917386 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -99,6 +99,9 @@
                                   SmallVector<SUnit*, 2>&);
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
   void ListScheduleBottomUp();
+
+  /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
+  bool ForceUnitLatencies() const { return true; }
 };
 }  // end anonymous namespace
 
@@ -153,7 +156,8 @@
   DOUT << "*** Scheduling [" << CurCycle << "]: ";
   DEBUG(SU->dump(this));
 
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+  SU->setHeightToAtLeast(CurCycle);
   Sequence.push_back(SU);
 
   // Bottom up: release predecessors
@@ -177,7 +181,7 @@
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     if (I->isAssignedRegDep()) {
-      if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) {
+      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
         assert(LiveRegDefs[I->getReg()] == SU &&
                "Physical register dependency violated?");
@@ -247,9 +251,6 @@
     }
     if (TID.isCommutable())
       NewSU->isCommutable = true;
-    // FIXME: Calculate height / depth and propagate the changes?
-    NewSU->Depth = SU->Depth;
-    NewSU->Height = SU->Height;
 
     // LoadNode may already exist. This can happen when there is another
     // load from the same location and producing the same type of value
@@ -262,9 +263,6 @@
     } else {
       LoadSU = NewSUnit(LoadNode);
       LoadNode->setNodeId(LoadSU->NodeNum);
-
-      LoadSU->Depth = SU->Depth;
-      LoadSU->Height = SU->Height;
     }
 
     SDep ChainPred;
@@ -344,10 +342,8 @@
   // New SUnit has the exact same predecessors.
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I)
-    if (!I->isArtificial()) {
+    if (!I->isArtificial())
       AddPred(NewSU, *I);
-      NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1);
-    }
 
   // Only copy scheduled successors. Cut them from old node's successor
   // list and move them over.
@@ -358,7 +354,6 @@
       continue;
     SUnit *SuccSU = I->getSUnit();
     if (SuccSU->isScheduled) {
-      NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1);
       SDep D = *I;
       D.setSUnit(NewSU);
       AddPred(SuccSU, D);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index b01700e..d42c8d88 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -120,10 +120,7 @@
   }
 #endif
   
-  // Compute the cycle when this SUnit actually becomes available.  This
-  // is the max of the start time of all predecessors plus their latencies.
-  unsigned PredDoneCycle = SU->Cycle + SU->Latency;
-  SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle);
+  SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
   
   if (SuccSU->NumPredsLeft == 0) {
     PendingQueue.push_back(SuccSU);
@@ -138,7 +135,8 @@
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
 
   // Top down: release successors.
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -171,14 +169,14 @@
     // Check to see if any of the pending instructions are ready to issue.  If
     // so, add them to the available queue.
     for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-      if (PendingQueue[i]->CycleBound == CurCycle) {
+      if (PendingQueue[i]->getDepth() == CurCycle) {
         AvailableQueue->push(PendingQueue[i]);
         PendingQueue[i]->isAvailable = true;
         PendingQueue[i] = PendingQueue.back();
         PendingQueue.pop_back();
         --i; --e;
       } else {
-        assert(PendingQueue[i]->CycleBound > CurCycle && "Negative latency?");
+        assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
       }
     }
     
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 49a5ac1..c3b6137 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -154,6 +154,10 @@
       Topo.InitDAGTopologicalSorting();
     return NewNode;
   }
+
+  /// ForceUnitLatencies - Return true, since register-pressure-reducing
+  /// scheduling doesn't need actual latency information.
+  bool ForceUnitLatencies() const { return true; }
 };
 }  // end anonymous namespace
 
@@ -171,8 +175,6 @@
 
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
-  CalculateDepths();
-  CalculateHeights();
   Topo.InitDAGTopologicalSorting();
 
   AvailableQueue->initNodes(SUnits);
@@ -272,7 +274,8 @@
   DOUT << "*** Scheduling [" << CurCycle << "]: ";
   DEBUG(SU->dump(this));
 
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+  SU->setHeightToAtLeast(CurCycle);
   Sequence.push_back(SU);
 
   // Bottom up: release predecessors
@@ -296,7 +299,7 @@
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     if (I->isAssignedRegDep()) {
-      if (LiveRegCycles[I->getReg()] == I->getSUnit()->Cycle) {
+      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
         assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
         assert(LiveRegDefs[I->getReg()] == SU &&
                "Physical register dependency violated?");
@@ -328,7 +331,7 @@
 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
 /// its predecessor states to reflect the change.
 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
-  DOUT << "*** Unscheduling [" << SU->Cycle << "]: ";
+  DOUT << "*** Unscheduling [" << SU->getHeight() << "]: ";
   DEBUG(SU->dump(this));
 
   AvailableQueue->UnscheduledNode(SU);
@@ -336,7 +339,7 @@
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     CapturePred(&*I);
-    if (I->isAssignedRegDep() && SU->Cycle == LiveRegCycles[I->getReg()]) {
+    if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
       assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
              "Physical register dependency violated?");
@@ -353,12 +356,12 @@
         LiveRegDefs[I->getReg()] = SU;
         ++NumLiveRegs;
       }
-      if (I->getSUnit()->Cycle < LiveRegCycles[I->getReg()])
-        LiveRegCycles[I->getReg()] = I->getSUnit()->Cycle;
+      if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
+        LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
     }
   }
 
-  SU->Cycle = 0;
+  SU->setHeightDirty();
   SU->isScheduled = false;
   SU->isAvailable = true;
   AvailableQueue->push(SU);
@@ -443,9 +446,6 @@
     } else {
       LoadSU = CreateNewSUnit(LoadNode);
       LoadNode->setNodeId(LoadSU->NodeNum);
-
-      LoadSU->Depth = SU->Depth;
-      LoadSU->Height = SU->Height;
       ComputeLatency(LoadSU);
     }
 
@@ -462,9 +462,6 @@
     }
     if (TID.isCommutable())
       NewSU->isCommutable = true;
-    // FIXME: Calculate height / depth and propagate the changes?
-    NewSU->Depth = SU->Depth;
-    NewSU->Height = SU->Height;
     ComputeLatency(NewSU);
 
     SDep ChainPred;
@@ -548,10 +545,8 @@
   // New SUnit has the exact same predecessors.
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I)
-    if (!I->isArtificial()) {
+    if (!I->isArtificial())
       AddPred(NewSU, *I);
-      NewSU->Depth = std::max(NewSU->Depth, I->getSUnit()->Depth+1);
-    }
 
   // Only copy scheduled successors. Cut them from old node's successor
   // list and move them over.
@@ -562,7 +557,6 @@
       continue;
     SUnit *SuccSU = I->getSUnit();
     if (SuccSU->isScheduled) {
-      NewSU->Height = std::max(NewSU->Height, SuccSU->Height+1);
       SDep D = *I;
       D.setSUnit(NewSU);
       AddPred(SuccSU, D);
@@ -570,9 +564,8 @@
       DelDeps.push_back(std::make_pair(SuccSU, D));
     }
   }
-  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
     RemovePred(DelDeps[i].first, DelDeps[i].second);
-  }
 
   AvailableQueue->updateNode(SU);
   AvailableQueue->addNode(NewSU);
@@ -590,8 +583,6 @@
   SUnit *CopyFromSU = CreateNewSUnit(NULL);
   CopyFromSU->CopySrcRC = SrcRC;
   CopyFromSU->CopyDstRC = DestRC;
-  CopyFromSU->Depth = SU->Depth;
-  CopyFromSU->Height = SU->Height;
 
   SUnit *CopyToSU = CreateNewSUnit(NULL);
   CopyToSU->CopySrcRC = DestRC;
@@ -870,7 +861,8 @@
   DOUT << "*** Scheduling [" << CurCycle << "]: ";
   DEBUG(SU->dump(this));
 
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
   Sequence.push_back(SU);
 
   // Top down: release successors
@@ -1107,19 +1099,19 @@
 /// closestSucc - Returns the scheduled cycle of the successor which is
 /// closet to the current cycle.
 static unsigned closestSucc(const SUnit *SU) {
-  unsigned MaxCycle = 0;
+  unsigned MaxHeight = 0;
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
-    unsigned Cycle = I->getSUnit()->Cycle;
+    unsigned Height = I->getSUnit()->getHeight();
     // If there are bunch of CopyToRegs stacked up, they should be considered
     // to be at the same position.
     if (I->getSUnit()->getNode() &&
         I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
-      Cycle = closestSucc(I->getSUnit())+1;
-    if (Cycle > MaxCycle)
-      MaxCycle = Cycle;
+      Height = closestSucc(I->getSUnit())+1;
+    if (Height > MaxHeight)
+      MaxHeight = Height;
   }
-  return MaxCycle;
+  return MaxHeight;
 }
 
 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
@@ -1182,11 +1174,11 @@
   if (LScratch != RScratch)
     return LScratch > RScratch;
 
-  if (left->Height != right->Height)
-    return left->Height > right->Height;
+  if (left->getHeight() != right->getHeight())
+    return left->getHeight() > right->getHeight();
   
-  if (left->Depth != right->Depth)
-    return left->Depth < right->Depth;
+  if (left->getDepth() != right->getDepth())
+    return left->getDepth() < right->getDepth();
 
   assert(left->NodeQueueId && right->NodeQueueId && 
          "NodeQueueId cannot be zero");
@@ -1294,7 +1286,8 @@
           continue;
         // Be conservative. Ignore if nodes aren't at roughly the same
         // depth and height.
-        if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1)
+        if (SuccSU->getHeight() < SU->getHeight() &&
+            (SU->getHeight() - SuccSU->getHeight()) > 1)
           continue;
         if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
           continue;
@@ -1384,8 +1377,8 @@
   if (LPriority+LBonus != RPriority+RBonus)
     return LPriority+LBonus < RPriority+RBonus;
 
-  if (left->Depth != right->Depth)
-    return left->Depth < right->Depth;
+  if (left->getDepth() != right->getDepth())
+    return left->getDepth() < right->getDepth();
 
   if (left->NumSuccsLeft != right->NumSuccsLeft)
     return left->NumSuccsLeft > right->NumSuccsLeft;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 69d3045..21df142 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -80,6 +80,9 @@
        E = DAG->allnodes_end(); NI != E; ++NI)
     NI->setNodeId(-1);
 
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
   for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
        E = DAG->allnodes_end(); NI != E; ++NI) {
     if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
@@ -133,7 +136,10 @@
     N->setNodeId(NodeSUnit->NodeNum);
 
     // Assign the Latency field of NodeSUnit using target-provided information.
-    ComputeLatency(NodeSUnit);
+    if (UnitLatencies)
+      NodeSUnit->Latency = 1;
+    else
+      ComputeLatency(NodeSUnit);
   }
   
   // Pass 2: add the preds, succs, etc.