Experimental post-pass scheduling support. Post-pass scheduling
is currently off by default, and can be enabled with
-disable-post-RA-scheduler=false.

This doesn't have a significant impact on most code yet because it doesn't
yet do anything to address anti-dependencies and it doesn't attempt to
disambiguate memory references. Also, several popular targets
don't have pipeline descriptions yet.

The majority of the changes here are splitting the SelectionDAG-specific
code out of ScheduleDAG, so that ScheduleDAG can be moved to
libLLVMCodeGen.a. The interface between ScheduleDAG-using code and
the rest of the scheduling code is somewhat rough and will evolve.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@59676 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index a7aad33..fed3237 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -8,6 +8,7 @@
   IfConversion.cpp
   IntrinsicLowering.cpp
   LLVMTargetMachine.cpp
+  LatencyPriorityQueue.cpp
   LiveInterval.cpp
   LiveIntervalAnalysis.cpp
   LiveStackAnalysis.cpp
@@ -40,6 +41,10 @@
   RegAllocSimple.cpp
   RegisterCoalescer.cpp
   RegisterScavenging.cpp
+  ScheduleDAG.cpp
+  ScheduleDAGEmit.cpp
+  ScheduleDAGInstrs.cpp
+  ScheduleDAGPrinter.cpp
   ShadowStackGC.cpp
   SimpleRegisterCoalescing.cpp
   StackProtector.cpp
diff --git a/lib/CodeGen/SelectionDAG/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
similarity index 98%
rename from lib/CodeGen/SelectionDAG/LatencyPriorityQueue.cpp
rename to lib/CodeGen/LatencyPriorityQueue.cpp
index ae73f20..70b6574 100644
--- a/lib/CodeGen/SelectionDAG/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -14,7 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "scheduler"
-#include "LatencyPriorityQueue.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 41981d3..3a31eaa 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -20,16 +20,22 @@
 
 #define DEBUG_TYPE "post-RA-sched"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
 namespace {
-  class VISIBILITY_HIDDEN SchedulePostRATDList : public MachineFunctionPass {
+  class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass {
   public:
     static char ID;
-    SchedulePostRATDList() : MachineFunctionPass(&ID) {}
+    PostRAScheduler() : MachineFunctionPass(&ID) {}
   private:
     MachineFunction *MF;
     const TargetMachine *TM;
@@ -40,27 +46,208 @@
 
     bool runOnMachineFunction(MachineFunction &Fn);
   };
-  char SchedulePostRATDList::ID = 0;
+  char PostRAScheduler::ID = 0;
+
+  class VISIBILITY_HIDDEN SchedulePostRATDList : public ScheduleDAGInstrs {
+  public:
+    SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm)
+      : ScheduleDAGInstrs(mbb, tm) {}
+  private:
+    MachineFunction *MF;
+    const TargetMachine *TM;
+
+    /// AvailableQueue - The priority queue to use for the available SUnits.
+    ///
+    LatencyPriorityQueue AvailableQueue;
+  
+    /// PendingQueue - This contains all of the instructions whose operands have
+    /// been issued, but their results are not ready yet (due to the latency of
+    /// the operation).  Once the operands becomes available, the instruction is
+    /// added to the AvailableQueue.
+    std::vector<SUnit*> PendingQueue;
+
+  public:
+    const char *getPassName() const {
+      return "Post RA top-down list latency scheduler (STUB)";
+    }
+
+    bool runOnMachineFunction(MachineFunction &Fn);
+
+    void Schedule();
+
+  private:
+    void ReleaseSucc(SUnit *SU, SUnit *SuccSU, bool isChain);
+    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+    void ListScheduleTopDown();
+  };
 }
 
-bool SchedulePostRATDList::runOnMachineFunction(MachineFunction &Fn) {
-  DOUT << "SchedulePostRATDList\n";
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+  DOUT << "PostRAScheduler\n";
   MF = &Fn;
   TM = &MF->getTarget();
 
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
-       MBB != MBBe; ++MBB)
-    ;
+       MBB != MBBe; ++MBB) {
+
+    SchedulePostRATDList Scheduler(MBB, *TM);
+
+    Scheduler.Run();
+
+    Scheduler.EmitSchedule();
+  }
 
   return true;
 }
   
+/// Schedule - Schedule the DAG using list scheduling.
+void SchedulePostRATDList::Schedule() {
+  DOUT << "********** List Scheduling **********\n";
+  
+  // Build scheduling units.
+  BuildSchedUnits();
+
+  AvailableQueue.initNodes(SUnits);
+  
+  ListScheduleTopDown();
+  
+  AvailableQueue.releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SUnit *SuccSU, bool isChain) {
+  --SuccSU->NumPredsLeft;
+  
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft < 0) {
+    cerr << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    cerr << " has been released too many times!\n";
+    assert(0);
+  }
+#endif
+  
+  // Compute how many cycles it will be before this actually becomes
+  // available.  This is the max of the start time of all predecessors plus
+  // their latencies.
+  // If this is a token edge, we don't need to wait for the latency of the
+  // preceeding instruction (e.g. a long-latency load) unless there is also
+  // some other data dependence.
+  unsigned PredDoneCycle = SU->Cycle;
+  if (!isChain)
+    PredDoneCycle += SU->Latency;
+  else if (SU->Latency)
+    PredDoneCycle += 1;
+  SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle);
+  
+  if (SuccSU->NumPredsLeft == 0) {
+    PendingQueue.push_back(SuccSU);
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DOUT << "*** Scheduling [" << CurCycle << "]: ";
+  DEBUG(SU->dump(this));
+  
+  Sequence.push_back(SU);
+  SU->Cycle = CurCycle;
+
+  // Top down: release successors.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    ReleaseSucc(SU, I->Dep, I->isCtrl);
+
+  SU->isScheduled = true;
+  AvailableQueue.ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.empty()) {
+      AvailableQueue.push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+  
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+    // Check to see if any of the pending instructions are ready to issue.  If
+    // so, add them to the available queue.
+    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+      if (PendingQueue[i]->CycleBound == CurCycle) {
+        AvailableQueue.push(PendingQueue[i]);
+        PendingQueue[i]->isAvailable = true;
+        PendingQueue[i] = PendingQueue.back();
+        PendingQueue.pop_back();
+        --i; --e;
+      } else {
+        assert(PendingQueue[i]->CycleBound > CurCycle && "Negative latency?");
+      }
+    }
+    
+    // If there are no instructions available, don't try to issue anything, and
+    // don't advance the hazard recognizer.
+    if (AvailableQueue.empty()) {
+      ++CurCycle;
+      continue;
+    }
+
+    SUnit *FoundSUnit = AvailableQueue.pop();
+    
+    // If we found a node to schedule, do it now.
+    if (FoundSUnit) {
+      ScheduleNodeTopDown(FoundSUnit, CurCycle);
+
+      // If this is a pseudo-op node, we don't want to increment the current
+      // cycle.
+      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
+        ++CurCycle;        
+    } else {
+      // Otherwise, we have a pipeline stall, but no other problem, just advance
+      // the current cycle and try again.
+      DOUT << "*** Advancing cycle, no work to do\n";
+      ++NumStalls;
+      ++CurCycle;
+    }
+  }
+
+#ifndef NDEBUG
+  // Verify that all SUnits were scheduled.
+  bool AnyNotSched = false;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    if (SUnits[i].NumPredsLeft != 0) {
+      if (!AnyNotSched)
+        cerr << "*** List scheduling failed! ***\n";
+      SUnits[i].dump(this);
+      cerr << "has not been scheduled!\n";
+      AnyNotSched = true;
+    }
+  }
+  assert(!AnyNotSched);
+#endif
+}
 
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
 FunctionPass *llvm::createPostRAScheduler() {
-  return new SchedulePostRATDList();
+  return new PostRAScheduler();
 }
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 0000000..e6a8d13
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,210 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+ScheduleDAG::ScheduleDAG(SelectionDAG *dag, MachineBasicBlock *bb,
+                         const TargetMachine &tm)
+  : DAG(dag), BB(bb), TM(tm), MRI(BB->getParent()->getRegInfo()) {
+  TII = TM.getInstrInfo();
+  MF  = BB->getParent();
+  TRI = TM.getRegisterInfo();
+  TLI = TM.getTargetLowering();
+  ConstPool = MF->getConstantPool();
+}
+
+ScheduleDAG::~ScheduleDAG() {}
+
+/// CalculateDepths - compute depths using algorithms for the longest
+/// paths in the DAG
+void ScheduleDAG::CalculateDepths() {
+  unsigned DAGSize = SUnits.size();
+  std::vector<SUnit*> WorkList;
+  WorkList.reserve(DAGSize);
+
+  // Initialize the data structures
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    unsigned Degree = SU->Preds.size();
+    // Temporarily use the Depth field as scratch space for the degree count.
+    SU->Depth = Degree;
+
+    // Is it a node without dependencies?
+    if (Degree == 0) {
+        assert(SU->Preds.empty() && "SUnit should have no predecessors");
+        // Collect leaf nodes
+        WorkList.push_back(SU);
+    }
+  }
+
+  // Process nodes in the topological order
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back();
+    WorkList.pop_back();
+    unsigned SUDepth = 0;
+
+    // Use dynamic programming:
+    // When current node is being processed, all of its dependencies
+    // are already processed.
+    // So, just iterate over all predecessors and take the longest path
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      unsigned PredDepth = I->Dep->Depth;
+      if (PredDepth+1 > SUDepth) {
+          SUDepth = PredDepth + 1;
+      }
+    }
+
+    SU->Depth = SUDepth;
+
+    // Update degrees of all nodes depending on current SUnit
+    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I) {
+      SUnit *SU = I->Dep;
+      if (!--SU->Depth)
+        // If all dependencies of the node are processed already,
+        // then the longest path for the node can be computed now
+        WorkList.push_back(SU);
+    }
+  }
+}
+
+/// CalculateHeights - compute heights using algorithms for the longest
+/// paths in the DAG
+void ScheduleDAG::CalculateHeights() {
+  unsigned DAGSize = SUnits.size();
+  std::vector<SUnit*> WorkList;
+  WorkList.reserve(DAGSize);
+
+  // Initialize the data structures
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    unsigned Degree = SU->Succs.size();
+    // Temporarily use the Height field as scratch space for the degree count.
+    SU->Height = Degree;
+
+    // Is it a node without dependencies?
+    if (Degree == 0) {
+        assert(SU->Succs.empty() && "Something wrong");
+        assert(WorkList.empty() && "Should be empty");
+        // Collect leaf nodes
+        WorkList.push_back(SU);
+    }
+  }
+
+  // Process nodes in the topological order
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back();
+    WorkList.pop_back();
+    unsigned SUHeight = 0;
+
+    // Use dynamic programming:
+    // When current node is being processed, all of its dependencies
+    // are already processed.
+    // So, just iterate over all successors and take the longest path
+    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I) {
+      unsigned SuccHeight = I->Dep->Height;
+      if (SuccHeight+1 > SUHeight) {
+          SUHeight = SuccHeight + 1;
+      }
+    }
+
+    SU->Height = SUHeight;
+
+    // Update degrees of all nodes depending on current SUnit
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      SUnit *SU = I->Dep;
+      if (!--SU->Height)
+        // If all dependencies of the node are processed already,
+        // then the longest path for the node can be computed now
+        WorkList.push_back(SU);
+    }
+  }
+}
+
+/// dump - dump the schedule.
+void ScheduleDAG::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(this);
+    else
+      cerr << "**** NOOP ****\n";
+  }
+}
+
+
+/// Run - perform scheduling.
+///
+void ScheduleDAG::Run() {
+  Schedule();
+  
+  DOUT << "*** Final schedule ***\n";
+  DEBUG(dumpSchedule());
+  DOUT << "\n";
+}
+
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const ScheduleDAG *G) const {
+  cerr << "SU(" << NodeNum << "): ";
+  G->dumpNode(this);
+}
+
+void SUnit::dumpAll(const ScheduleDAG *G) const {
+  dump(G);
+
+  cerr << "  # preds left       : " << NumPredsLeft << "\n";
+  cerr << "  # succs left       : " << NumSuccsLeft << "\n";
+  cerr << "  Latency            : " << Latency << "\n";
+  cerr << "  Depth              : " << Depth << "\n";
+  cerr << "  Height             : " << Height << "\n";
+
+  if (Preds.size() != 0) {
+    cerr << "  Predecessors:\n";
+    for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+         I != E; ++I) {
+      if (I->isCtrl)
+        cerr << "   ch  #";
+      else
+        cerr << "   val #";
+      cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")";
+      if (I->isSpecial)
+        cerr << " *";
+      cerr << "\n";
+    }
+  }
+  if (Succs.size() != 0) {
+    cerr << "  Successors:\n";
+    for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+         I != E; ++I) {
+      if (I->isCtrl)
+        cerr << "   ch  #";
+      else
+        cerr << "   val #";
+      cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")";
+      if (I->isSpecial)
+        cerr << " *";
+      cerr << "\n";
+    }
+  }
+  cerr << "\n";
+}
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
new file mode 100644
index 0000000..ce3283d
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -0,0 +1,72 @@
+//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the ScheduleDAG class, which creates
+// MachineInstrs according to the computed schedule.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+void ScheduleDAG::AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO) {
+  MI->addMemOperand(*MF, MO);
+}
+
+void ScheduleDAG::EmitNoop() {
+  TII->insertNoop(*BB, BB->end());
+}
+
+void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
+                                  DenseMap<SUnit*, unsigned> &VRBaseMap) {
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl) continue;  // ignore chain preds
+    if (I->Dep->CopyDstRC) {
+      // Copy to physical register.
+      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->Dep);
+      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+      // Find the destination physical register.
+      unsigned Reg = 0;
+      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+             EE = SU->Succs.end(); II != EE; ++II) {
+        if (I->Reg) {
+          Reg = I->Reg;
+          break;
+        }
+      }
+      assert(I->Reg && "Unknown physical register!");
+      TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second,
+                        SU->CopyDstRC, SU->CopySrcRC);
+    } else {
+      // Copy from physical register.
+      assert(I->Reg && "Unknown physical register!");
+      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+      isNew = isNew; // Silence compiler warning.
+      assert(isNew && "Node emitted out of order - early");
+      TII->copyRegToReg(*BB, BB->end(), VRBase, I->Reg,
+                        SU->CopyDstRC, SU->CopySrcRC);
+    }
+    break;
+  }
+}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 0000000..72008c9
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,135 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched-instrs"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb,
+                                     const TargetMachine &tm)
+  : ScheduleDAG(0, bb, tm) {}
+
+void ScheduleDAGInstrs::BuildSchedUnits() {
+  SUnits.clear();
+  SUnits.reserve(BB->size());
+
+  std::vector<SUnit *> PendingLoads;
+  SUnit *Terminator = 0;
+  SUnit *Chain = 0;
+  SUnit *Defs[TargetRegisterInfo::FirstVirtualRegister] = {};
+  std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister] = {};
+  int Cost = 1; // FIXME
+
+  for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin();
+       MII != MIE; --MII) {
+    MachineInstr *MI = prior(MII);
+    SUnit *SU = NewSUnit(MI);
+
+    for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
+      const MachineOperand &MO = MI->getOperand(j);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+
+      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+      std::vector<SUnit *> &UseList = Uses[Reg];
+      SUnit *&Def = Defs[Reg];
+      // Optionally add output and anti dependences
+      if (Def && Def != SU)
+        Def->addPred(SU, /*isCtrl=*/true, /*isSpecial=*/false,
+                     /*PhyReg=*/Reg, Cost);
+      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+        SUnit *&Def = Defs[*Alias];
+        if (Def && Def != SU)
+          Def->addPred(SU, /*isCtrl=*/true, /*isSpecial=*/false,
+                       /*PhyReg=*/*Alias, Cost);
+      }
+
+      if (MO.isDef()) {
+        // Add any data dependencies.
+        for (unsigned i = 0, e = UseList.size(); i != e; ++i)
+          if (UseList[i] != SU)
+            UseList[i]->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false,
+                                /*PhysReg=*/Reg, Cost);
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          std::vector<SUnit *> &UseList = Uses[*Alias];
+          for (unsigned i = 0, e = UseList.size(); i != e; ++i)
+            if (UseList[i] != SU)
+              UseList[i]->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false,
+                                  /*PhysReg=*/*Alias, Cost);
+        }
+
+        UseList.clear();
+        Def = SU;
+      } else {
+        UseList.push_back(SU);
+      }
+    }
+    bool False = false;
+    bool True = true;
+    if (!MI->isSafeToMove(TII, False)) {
+      if (Chain)
+        Chain->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false);
+      for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+        PendingLoads[k]->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false);
+      PendingLoads.clear();
+      Chain = SU;
+    } else if (!MI->isSafeToMove(TII, True)) {
+      if (Chain)
+        Chain->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false);
+      PendingLoads.push_back(SU);
+    }
+    if (Terminator && SU->Succs.empty())
+      Terminator->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false);
+    if (MI->getDesc().isTerminator())
+      Terminator = SU;
+  }
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+  SU->getInstr()->dump();
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+  std::string s;
+  raw_string_ostream oss(s);
+  SU->getInstr()->print(oss);
+  return oss.str();
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
+  // For MachineInstr-based scheduling, we're rescheduling the instructions in
+  // the block, so start by removing them from the block.
+  while (!BB->empty())
+    BB->remove(BB->begin());
+
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    SUnit *SU = Sequence[i];
+    if (!SU) {
+      // Null SUnit* is a noop.
+      EmitNoop();
+      continue;
+    }
+
+    BB->push_back(SU->getInstr());
+  }
+
+  return BB;
+}
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 0000000..b5d030c
--- /dev/null
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,93 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+    static std::string getGraphName(const ScheduleDAG *G) {
+      return G->MF->getFunction()->getName();
+    }
+
+    static bool renderGraphFromBottomUp() {
+      return true;
+    }
+    
+    static bool hasNodeAddressLabel(const SUnit *Node,
+                                    const ScheduleDAG *Graph) {
+      return true;
+    }
+    
+    /// If you want to override the dot attributes printed for a particular
+    /// edge, override this method.
+    template<typename EdgeIter>
+    static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
+      if (EI.isSpecialDep())
+        return "color=cyan,style=dashed";
+      if (EI.isCtrlDep())
+        return "color=blue,style=dashed";
+      return "";
+    }
+    
+
+    static std::string getNodeLabel(const SUnit *Node,
+                                    const ScheduleDAG *Graph);
+    static std::string getNodeAttributes(const SUnit *N,
+                                         const ScheduleDAG *Graph) {
+      return "shape=Mrecord";
+    }
+
+    static void addCustomGraphFeatures(ScheduleDAG *G,
+                                       GraphWriter<ScheduleDAG*> &GW) {
+      return G->addCustomGraphFeatures(GW);
+    }
+  };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+                                                       const ScheduleDAG *G) {
+  return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph() {
+// This code is only for debugging!
+#ifndef NDEBUG
+  ViewGraph(this, "dag." + MF->getFunction()->getName(),
+            "Scheduling-Units Graph for " + MF->getFunction()->getName() + ':' +
+            BB->getBasicBlock()->getName());
+#else
+  cerr << "ScheduleDAG::viewGraph is only available in debug builds on "
+       << "systems with Graphviz or gv!\n";
+#endif  // NDEBUG
+}
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index a3654c2..186e3d1 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -2,15 +2,14 @@
   CallingConvLower.cpp
   DAGCombiner.cpp
   FastISel.cpp
-  LatencyPriorityQueue.cpp
   LegalizeDAG.cpp
   LegalizeFloatTypes.cpp
   LegalizeIntegerTypes.cpp
   LegalizeTypes.cpp
   LegalizeTypesGeneric.cpp
   LegalizeVectorTypes.cpp
-  ScheduleDAG.cpp
-  ScheduleDAGEmit.cpp
+  ScheduleDAGSDNodes.cpp
+  ScheduleDAGSDNodesEmit.cpp
   ScheduleDAGFast.cpp
   ScheduleDAGList.cpp
   ScheduleDAGRRList.cpp
diff --git a/lib/CodeGen/SelectionDAG/LatencyPriorityQueue.h b/lib/CodeGen/SelectionDAG/LatencyPriorityQueue.h
deleted file mode 100644
index 5b2b02b..0000000
--- a/lib/CodeGen/SelectionDAG/LatencyPriorityQueue.h
+++ /dev/null
@@ -1,124 +0,0 @@
-//===---- LatencyPriorityQueue.h - A latency-oriented priority queue ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the LatencyPriorityQueue class, which is a
-// SchedulingPriorityQueue that schedules using latency information to
-// reduce the length of the critical path through the basic block.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LATENCY_PRIORITY_QUEUE_H
-#define LATENCY_PRIORITY_QUEUE_H
-
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/ADT/PriorityQueue.h"
-
-namespace llvm {
-  class LatencyPriorityQueue;
-  
-  /// Sorting functions for the Available queue.
-  struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> {
-    LatencyPriorityQueue *PQ;
-    explicit latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {}
-    
-    bool operator()(const SUnit* left, const SUnit* right) const;
-  };
-
-  class LatencyPriorityQueue : public SchedulingPriorityQueue {
-    // SUnits - The SUnits for the current graph.
-    std::vector<SUnit> *SUnits;
-    
-    // Latencies - The latency (max of latency from this node to the bb exit)
-    // for each node.
-    std::vector<int> Latencies;
-
-    /// NumNodesSolelyBlocking - This vector contains, for every node in the
-    /// Queue, the number of nodes that the node is the sole unscheduled
-    /// predecessor for.  This is used as a tie-breaker heuristic for better
-    /// mobility.
-    std::vector<unsigned> NumNodesSolelyBlocking;
-
-    PriorityQueue<SUnit*, std::vector<SUnit*>, latency_sort> Queue;
-  public:
-    LatencyPriorityQueue() : Queue(latency_sort(this)) {
-    }
-    
-    void initNodes(std::vector<SUnit> &sunits) {
-      SUnits = &sunits;
-      // Calculate node priorities.
-      CalculatePriorities();
-    }
-
-    void addNode(const SUnit *SU) {
-      Latencies.resize(SUnits->size(), -1);
-      NumNodesSolelyBlocking.resize(SUnits->size(), 0);
-      CalcLatency(*SU);
-    }
-
-    void updateNode(const SUnit *SU) {
-      Latencies[SU->NodeNum] = -1;
-      CalcLatency(*SU);
-    }
-
-    void releaseState() {
-      SUnits = 0;
-      Latencies.clear();
-    }
-    
-    unsigned getLatency(unsigned NodeNum) const {
-      assert(NodeNum < Latencies.size());
-      return Latencies[NodeNum];
-    }
-    
-    unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
-      assert(NodeNum < NumNodesSolelyBlocking.size());
-      return NumNodesSolelyBlocking[NodeNum];
-    }
-    
-    unsigned size() const { return Queue.size(); }
-
-    bool empty() const { return Queue.empty(); }
-    
-    virtual void push(SUnit *U) {
-      push_impl(U);
-    }
-    void push_impl(SUnit *U);
-    
-    void push_all(const std::vector<SUnit *> &Nodes) {
-      for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
-        push_impl(Nodes[i]);
-    }
-    
-    SUnit *pop() {
-      if (empty()) return NULL;
-      SUnit *V = Queue.top();
-      Queue.pop();
-      return V;
-    }
-
-    void remove(SUnit *SU) {
-      assert(!Queue.empty() && "Not in queue!");
-      Queue.erase_one(SU);
-    }
-
-    // ScheduledNode - As nodes are scheduled, we look to see if there are any
-    // successor nodes that have a single unscheduled predecessor.  If so, that
-    // single predecessor has a higher priority, since scheduling it will make
-    // the node available.
-    void ScheduledNode(SUnit *Node);
-
-  private:
-    void CalculatePriorities();
-    int CalcLatency(const SUnit &SU);
-    void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
-    SUnit *getSingleUnscheduledPred(SUnit *SU);
-  };
-}
-
-#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
deleted file mode 100644
index c2e2175..0000000
--- a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp
+++ /dev/null
@@ -1,522 +0,0 @@
-//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements the ScheduleDAG class, which is a base class used by
-// scheduling implementation classes.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-ScheduleDAG::ScheduleDAG(SelectionDAG *dag, MachineBasicBlock *bb,
-                         const TargetMachine &tm)
-  : DAG(dag), BB(bb), TM(tm), MRI(BB->getParent()->getRegInfo()) {
-  TII = TM.getInstrInfo();
-  MF  = BB->getParent();
-  TRI = TM.getRegisterInfo();
-  TLI = TM.getTargetLowering();
-  ConstPool = MF->getConstantPool();
-}
-
-/// CheckForPhysRegDependency - Check if the dependency between def and use of
-/// a specified operand is a physical register dependency. If so, returns the
-/// register and the cost of copying the register.
-static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
-                                      const TargetRegisterInfo *TRI, 
-                                      const TargetInstrInfo *TII,
-                                      unsigned &PhysReg, int &Cost) {
-  if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
-    return;
-
-  unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
-  if (TargetRegisterInfo::isVirtualRegister(Reg))
-    return;
-
-  unsigned ResNo = User->getOperand(2).getResNo();
-  if (Def->isMachineOpcode()) {
-    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
-    if (ResNo >= II.getNumDefs() &&
-        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
-      PhysReg = Reg;
-      const TargetRegisterClass *RC =
-        TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
-      Cost = RC->getCopyCost();
-    }
-  }
-}
-
-SUnit *ScheduleDAG::Clone(SUnit *Old) {
-  SUnit *SU = NewSUnit(Old->getNode());
-  SU->OrigNode = Old->OrigNode;
-  SU->Latency = Old->Latency;
-  SU->isTwoAddress = Old->isTwoAddress;
-  SU->isCommutable = Old->isCommutable;
-  SU->hasPhysRegDefs = Old->hasPhysRegDefs;
-  return SU;
-}
-
-
-/// BuildSchedUnits - Build SUnits from the selection dag that we are input.
-/// This SUnit graph is similar to the SelectionDAG, but represents flagged
-/// together nodes with a single SUnit.
-void ScheduleDAG::BuildSchedUnits() {
-  // For post-regalloc scheduling, build the SUnits from the MachineInstrs
-  // in the MachineBasicBlock.
-  if (!DAG) {
-    BuildSchedUnitsFromMBB();
-    return;
-  }
-
-  // Reserve entries in the vector for each of the SUnits we are creating.  This
-  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
-  // invalidated.
-  SUnits.reserve(DAG->allnodes_size());
-  
-  // During scheduling, the NodeId field of SDNode is used to map SDNodes
-  // to their associated SUnits by holding SUnits table indices. A value
-  // of -1 means the SDNode does not yet have an associated SUnit.
-  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
-       E = DAG->allnodes_end(); NI != E; ++NI)
-    NI->setNodeId(-1);
-
-  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
-       E = DAG->allnodes_end(); NI != E; ++NI) {
-    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
-      continue;
-    
-    // If this node has already been processed, stop now.
-    if (NI->getNodeId() != -1) continue;
-    
-    SUnit *NodeSUnit = NewSUnit(NI);
-    
-    // See if anything is flagged to this node, if so, add them to flagged
-    // nodes.  Nodes can have at most one flag input and one flag output.  Flags
-    // are required the be the last operand and result of a node.
-    
-    // Scan up to find flagged preds.
-    SDNode *N = NI;
-    if (N->getNumOperands() &&
-        N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
-      do {
-        N = N->getOperand(N->getNumOperands()-1).getNode();
-        assert(N->getNodeId() == -1 && "Node already inserted!");
-        N->setNodeId(NodeSUnit->NodeNum);
-      } while (N->getNumOperands() &&
-               N->getOperand(N->getNumOperands()-1).getValueType()== MVT::Flag);
-    }
-    
-    // Scan down to find any flagged succs.
-    N = NI;
-    while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
-      SDValue FlagVal(N, N->getNumValues()-1);
-      
-      // There are either zero or one users of the Flag result.
-      bool HasFlagUse = false;
-      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 
-           UI != E; ++UI)
-        if (FlagVal.isOperandOf(*UI)) {
-          HasFlagUse = true;
-          assert(N->getNodeId() == -1 && "Node already inserted!");
-          N->setNodeId(NodeSUnit->NodeNum);
-          N = *UI;
-          break;
-        }
-      if (!HasFlagUse) break;
-    }
-    
-    // If there are flag operands involved, N is now the bottom-most node
-    // of the sequence of nodes that are flagged together.
-    // Update the SUnit.
-    NodeSUnit->setNode(N);
-    assert(N->getNodeId() == -1 && "Node already inserted!");
-    N->setNodeId(NodeSUnit->NodeNum);
-
-    ComputeLatency(NodeSUnit);
-  }
-  
-  // Pass 2: add the preds, succs, etc.
-  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
-    SUnit *SU = &SUnits[su];
-    SDNode *MainNode = SU->getNode();
-    
-    if (MainNode->isMachineOpcode()) {
-      unsigned Opc = MainNode->getMachineOpcode();
-      const TargetInstrDesc &TID = TII->get(Opc);
-      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
-        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
-          SU->isTwoAddress = true;
-          break;
-        }
-      }
-      if (TID.isCommutable())
-        SU->isCommutable = true;
-    }
-    
-    // Find all predecessors and successors of the group.
-    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
-      if (N->isMachineOpcode() &&
-          TII->get(N->getMachineOpcode()).getImplicitDefs() &&
-          CountResults(N) > TII->get(N->getMachineOpcode()).getNumDefs())
-        SU->hasPhysRegDefs = true;
-      
-      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-        SDNode *OpN = N->getOperand(i).getNode();
-        if (isPassiveNode(OpN)) continue;   // Not scheduled.
-        SUnit *OpSU = &SUnits[OpN->getNodeId()];
-        assert(OpSU && "Node has no SUnit!");
-        if (OpSU == SU) continue;           // In the same group.
-
-        MVT OpVT = N->getOperand(i).getValueType();
-        assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
-        bool isChain = OpVT == MVT::Other;
-
-        unsigned PhysReg = 0;
-        int Cost = 1;
-        // Determine if this is a physical register dependency.
-        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
-        SU->addPred(OpSU, isChain, false, PhysReg, Cost);
-      }
-    }
-  }
-}
-
-void ScheduleDAG::BuildSchedUnitsFromMBB() {
-  SUnits.clear();
-  SUnits.reserve(BB->size());
-
-  std::vector<SUnit *> PendingLoads;
-  SUnit *Terminator = 0;
-  SUnit *Chain = 0;
-  SUnit *Defs[TargetRegisterInfo::FirstVirtualRegister] = {};
-  std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister] = {};
-  int Cost = 1; // FIXME
-
-  for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin();
-       MII != MIE; --MII) {
-    MachineInstr *MI = prior(MII);
-    SUnit *SU = NewSUnit(MI);
-
-    for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
-      const MachineOperand &MO = MI->getOperand(j);
-      if (!MO.isReg()) continue;
-      unsigned Reg = MO.getReg();
-      if (Reg == 0) continue;
-
-      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
-      std::vector<SUnit *> &UseList = Uses[Reg];
-      SUnit *&Def = Defs[Reg];
-      // Optionally add output and anti dependences
-      if (Def && Def != SU)
-        Def->addPred(SU, /*isCtrl=*/true, /*isSpecial=*/false,
-                     /*PhyReg=*/Reg, Cost);
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        SUnit *&Def = Defs[*Alias];
-        if (Def && Def != SU)
-          Def->addPred(SU, /*isCtrl=*/true, /*isSpecial=*/false,
-                       /*PhyReg=*/*Alias, Cost);
-      }
-
-      if (MO.isDef()) {
-        // Add any data dependencies.
-        for (unsigned i = 0, e = UseList.size(); i != e; ++i)
-          if (UseList[i] != SU)
-            UseList[i]->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false,
-                                /*PhysReg=*/Reg, Cost);
-        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-          std::vector<SUnit *> &UseList = Uses[*Alias];
-          for (unsigned i = 0, e = UseList.size(); i != e; ++i)
-            if (UseList[i] != SU)
-              UseList[i]->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false,
-                                  /*PhysReg=*/*Alias, Cost);
-        }
-
-        UseList.clear();
-        Def = SU;
-      } else {
-        UseList.push_back(SU);
-      }
-    }
-    bool False = false;
-    bool True = true;
-    if (!MI->isSafeToMove(TII, False)) {
-      if (Chain)
-        Chain->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false);
-      for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-        PendingLoads[k]->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false);
-      PendingLoads.clear();
-      Chain = SU;
-    } else if (!MI->isSafeToMove(TII, True)) {
-      if (Chain)
-        Chain->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false);
-      PendingLoads.push_back(SU);
-    }
-    if (Terminator && SU->Succs.empty())
-      Terminator->addPred(SU, /*isCtrl=*/false, /*isSpecial=*/false);
-    if (MI->getDesc().isTerminator())
-      Terminator = SU;
-  }
-}
-
-void ScheduleDAG::ComputeLatency(SUnit *SU) {
-  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-  
-  // Compute the latency for the node.  We use the sum of the latencies for
-  // all nodes flagged together into this SUnit.
-  if (InstrItins.isEmpty()) {
-    // No latency information.
-    SU->Latency = 1;
-    return;
-  }
-
-  SU->Latency = 0;
-  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
-    if (N->isMachineOpcode()) {
-      unsigned SchedClass = TII->get(N->getMachineOpcode()).getSchedClass();
-      const InstrStage *S = InstrItins.begin(SchedClass);
-      const InstrStage *E = InstrItins.end(SchedClass);
-      for (; S != E; ++S)
-        SU->Latency += S->Cycles;
-    }
-  }
-}
-
-/// CalculateDepths - compute depths using algorithms for the longest
-/// paths in the DAG
-void ScheduleDAG::CalculateDepths() {
-  unsigned DAGSize = SUnits.size();
-  std::vector<SUnit*> WorkList;
-  WorkList.reserve(DAGSize);
-
-  // Initialize the data structures
-  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    unsigned Degree = SU->Preds.size();
-    // Temporarily use the Depth field as scratch space for the degree count.
-    SU->Depth = Degree;
-
-    // Is it a node without dependencies?
-    if (Degree == 0) {
-        assert(SU->Preds.empty() && "SUnit should have no predecessors");
-        // Collect leaf nodes
-        WorkList.push_back(SU);
-    }
-  }
-
-  // Process nodes in the topological order
-  while (!WorkList.empty()) {
-    SUnit *SU = WorkList.back();
-    WorkList.pop_back();
-    unsigned SUDepth = 0;
-
-    // Use dynamic programming:
-    // When current node is being processed, all of its dependencies
-    // are already processed.
-    // So, just iterate over all predecessors and take the longest path
-    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-         I != E; ++I) {
-      unsigned PredDepth = I->Dep->Depth;
-      if (PredDepth+1 > SUDepth) {
-          SUDepth = PredDepth + 1;
-      }
-    }
-
-    SU->Depth = SUDepth;
-
-    // Update degrees of all nodes depending on current SUnit
-    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-         I != E; ++I) {
-      SUnit *SU = I->Dep;
-      if (!--SU->Depth)
-        // If all dependencies of the node are processed already,
-        // then the longest path for the node can be computed now
-        WorkList.push_back(SU);
-    }
-  }
-}
-
-/// CalculateHeights - compute heights using algorithms for the longest
-/// paths in the DAG
-void ScheduleDAG::CalculateHeights() {
-  unsigned DAGSize = SUnits.size();
-  std::vector<SUnit*> WorkList;
-  WorkList.reserve(DAGSize);
-
-  // Initialize the data structures
-  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    unsigned Degree = SU->Succs.size();
-    // Temporarily use the Height field as scratch space for the degree count.
-    SU->Height = Degree;
-
-    // Is it a node without dependencies?
-    if (Degree == 0) {
-        assert(SU->Succs.empty() && "Something wrong");
-        assert(WorkList.empty() && "Should be empty");
-        // Collect leaf nodes
-        WorkList.push_back(SU);
-    }
-  }
-
-  // Process nodes in the topological order
-  while (!WorkList.empty()) {
-    SUnit *SU = WorkList.back();
-    WorkList.pop_back();
-    unsigned SUHeight = 0;
-
-    // Use dynamic programming:
-    // When current node is being processed, all of its dependencies
-    // are already processed.
-    // So, just iterate over all successors and take the longest path
-    for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-         I != E; ++I) {
-      unsigned SuccHeight = I->Dep->Height;
-      if (SuccHeight+1 > SUHeight) {
-          SUHeight = SuccHeight + 1;
-      }
-    }
-
-    SU->Height = SUHeight;
-
-    // Update degrees of all nodes depending on current SUnit
-    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-         I != E; ++I) {
-      SUnit *SU = I->Dep;
-      if (!--SU->Height)
-        // If all dependencies of the node are processed already,
-        // then the longest path for the node can be computed now
-        WorkList.push_back(SU);
-    }
-  }
-}
-
-/// CountResults - The results of target nodes have register or immediate
-/// operands first, then an optional chain, and optional flag operands (which do
-/// not go into the resulting MachineInstr).
-unsigned ScheduleDAG::CountResults(SDNode *Node) {
-  unsigned N = Node->getNumValues();
-  while (N && Node->getValueType(N - 1) == MVT::Flag)
-    --N;
-  if (N && Node->getValueType(N - 1) == MVT::Other)
-    --N;    // Skip over chain result.
-  return N;
-}
-
-/// CountOperands - The inputs to target nodes have any actual inputs first,
-/// followed by special operands that describe memory references, then an
-/// optional chain operand, then an optional flag operand.  Compute the number
-/// of actual operands that will go into the resulting MachineInstr.
-unsigned ScheduleDAG::CountOperands(SDNode *Node) {
-  unsigned N = ComputeMemOperandsEnd(Node);
-  while (N && isa<MemOperandSDNode>(Node->getOperand(N - 1).getNode()))
-    --N; // Ignore MEMOPERAND nodes
-  return N;
-}
-
-/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode
-/// operand
-unsigned ScheduleDAG::ComputeMemOperandsEnd(SDNode *Node) {
-  unsigned N = Node->getNumOperands();
-  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
-    --N;
-  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
-    --N; // Ignore chain if it exists.
-  return N;
-}
-
-
-/// dump - dump the schedule.
-void ScheduleDAG::dumpSchedule() const {
-  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
-    if (SUnit *SU = Sequence[i])
-      SU->dump(this);
-    else
-      cerr << "**** NOOP ****\n";
-  }
-}
-
-
-/// Run - perform scheduling.
-///
-void ScheduleDAG::Run() {
-  Schedule();
-  
-  DOUT << "*** Final schedule ***\n";
-  DEBUG(dumpSchedule());
-  DOUT << "\n";
-}
-
-/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
-/// a group of nodes flagged together.
-void SUnit::print(raw_ostream &O, const ScheduleDAG *G) const {
-  O << "SU(" << NodeNum << "): ";
-  if (getNode()) {
-    SmallVector<SDNode *, 4> FlaggedNodes;
-    for (SDNode *N = getNode(); N; N = N->getFlaggedNode())
-      FlaggedNodes.push_back(N);
-    while (!FlaggedNodes.empty()) {
-      O << "    ";
-      FlaggedNodes.back()->print(O, G->DAG);
-      O << "\n";
-      FlaggedNodes.pop_back();
-    }
-  } else {
-    O << "CROSS RC COPY\n";
-  }
-}
-
-void SUnit::dump(const ScheduleDAG *G) const {
-  print(errs(), G);
-}
-
-void SUnit::dumpAll(const ScheduleDAG *G) const {
-  dump(G);
-
-  cerr << "  # preds left       : " << NumPredsLeft << "\n";
-  cerr << "  # succs left       : " << NumSuccsLeft << "\n";
-  cerr << "  Latency            : " << Latency << "\n";
-  cerr << "  Depth              : " << Depth << "\n";
-  cerr << "  Height             : " << Height << "\n";
-
-  if (Preds.size() != 0) {
-    cerr << "  Predecessors:\n";
-    for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
-         I != E; ++I) {
-      if (I->isCtrl)
-        cerr << "   ch  #";
-      else
-        cerr << "   val #";
-      cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")";
-      if (I->isSpecial)
-        cerr << " *";
-      cerr << "\n";
-    }
-  }
-  if (Succs.size() != 0) {
-    cerr << "  Successors:\n";
-    for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
-         I != E; ++I) {
-      if (I->isCtrl)
-        cerr << "   ch  #";
-      else
-        cerr << "   val #";
-      cerr << I->Dep << " - SU(" << I->Dep->NodeNum << ")";
-      if (I->isSpecial)
-        cerr << " *";
-      cerr << "\n";
-    }
-  }
-  cerr << "\n";
-}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 0af0358..8f3198e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGSDNodes.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
@@ -58,7 +58,7 @@
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGFast - The actual "fast" list scheduler implementation.
 ///
-class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAG {
+class VISIBILITY_HIDDEN ScheduleDAGFast : public ScheduleDAGSDNodes {
 private:
   /// AvailableQueue - The priority queue to use for the available SUnits.
   FastPriorityQueue AvailableQueue;
@@ -73,7 +73,7 @@
 public:
   ScheduleDAGFast(SelectionDAG *dag, MachineBasicBlock *bb,
                   const TargetMachine &tm)
-    : ScheduleDAG(dag, bb, tm) {}
+    : ScheduleDAGSDNodes(dag, bb, tm) {}
 
   void Schedule();
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index c2c9110..e1a24a1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -19,7 +19,8 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleDAGSDNodes.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -30,7 +31,6 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/Statistic.h"
-#include "LatencyPriorityQueue.h"
 #include <climits>
 using namespace llvm;
 
@@ -46,7 +46,7 @@
 /// ScheduleDAGList - The actual list scheduler implementation.  This supports
 /// top-down scheduling.
 ///
-class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAG {
+class VISIBILITY_HIDDEN ScheduleDAGList : public ScheduleDAGSDNodes {
 private:
   /// AvailableQueue - The priority queue to use for the available SUnits.
   ///
@@ -66,7 +66,7 @@
                   const TargetMachine &tm,
                   SchedulingPriorityQueue *availqueue,
                   HazardRecognizer *HR)
-    : ScheduleDAG(dag, bb, tm),
+    : ScheduleDAGSDNodes(dag, bb, tm),
       AvailableQueue(availqueue), HazardRec(HR) {
     }
 
@@ -212,13 +212,13 @@
         if (!N) break;
         FoundNode = N;
       }
-      
+    
       HazardRecognizer::HazardType HT = HazardRec->getHazardType(FoundNode);
       if (HT == HazardRecognizer::NoHazard) {
         FoundSUnit = CurSUnit;
         break;
       }
-      
+    
       // Remember if this is a noop hazard.
       HasNoopHazards |= HT == HazardRecognizer::NoopHazard;
       
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 7c3f6bb..45a1ca0 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -16,7 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGSDNodes.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
@@ -53,7 +53,7 @@
 /// ScheduleDAGRRList - The actual register reduction list scheduler
 /// implementation.  This supports both top-down and bottom-up scheduling.
 ///
-class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAG {
+class VISIBILITY_HIDDEN ScheduleDAGRRList : public ScheduleDAGSDNodes {
 private:
   /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
   /// it is top-down.
@@ -77,7 +77,7 @@
   ScheduleDAGRRList(SelectionDAG *dag, MachineBasicBlock *bb,
                     const TargetMachine &tm, bool isbottomup, bool f,
                     SchedulingPriorityQueue *availqueue)
-    : ScheduleDAG(dag, bb, tm), isBottomUp(isbottomup), Fast(f),
+    : ScheduleDAGSDNodes(dag, bb, tm), isBottomUp(isbottomup), Fast(f),
       AvailableQueue(availqueue) {
     }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 0000000..9d32d9a
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,257 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(SelectionDAG *dag, MachineBasicBlock *bb,
+                                       const TargetMachine &tm)
+  : ScheduleDAG(dag, bb, tm) {
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+  SUnit *SU = NewSUnit(Old->getNode());
+  SU->OrigNode = Old->OrigNode;
+  SU->Latency = Old->Latency;
+  SU->isTwoAddress = Old->isTwoAddress;
+  SU->isCommutable = Old->isCommutable;
+  SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+  return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+                                      const TargetRegisterInfo *TRI, 
+                                      const TargetInstrInfo *TII,
+                                      unsigned &PhysReg, int &Cost) {
+  if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+    return;
+
+  unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+  if (TargetRegisterInfo::isVirtualRegister(Reg))
+    return;
+
+  unsigned ResNo = User->getOperand(2).getResNo();
+  if (Def->isMachineOpcode()) {
+    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+    if (ResNo >= II.getNumDefs() &&
+        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+      PhysReg = Reg;
+      const TargetRegisterClass *RC =
+        TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+      Cost = RC->getCopyCost();
+    }
+  }
+}
+
+/// BuildSchedUnits - Build SUnits from the selection dag that we are input.
+/// This SUnit graph is similar to the SelectionDAG, but represents flagged
+/// together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+  // Reserve entries in the vector for each of the SUnits we are creating.  This
+  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+  // invalidated.
+  SUnits.reserve(DAG->allnodes_size());
+  
+  // During scheduling, the NodeId field of SDNode is used to map SDNodes
+  // to their associated SUnits by holding SUnits table indices. A value
+  // of -1 means the SDNode does not yet have an associated SUnit.
+  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+       E = DAG->allnodes_end(); NI != E; ++NI)
+    NI->setNodeId(-1);
+
+  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+       E = DAG->allnodes_end(); NI != E; ++NI) {
+    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
+      continue;
+    
+    // If this node has already been processed, stop now.
+    if (NI->getNodeId() != -1) continue;
+    
+    SUnit *NodeSUnit = NewSUnit(NI);
+    
+    // See if anything is flagged to this node, if so, add them to flagged
+    // nodes.  Nodes can have at most one flag input and one flag output.  Flags
+    // are required the be the last operand and result of a node.
+    
+    // Scan up to find flagged preds.
+    SDNode *N = NI;
+    if (N->getNumOperands() &&
+        N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+      do {
+        N = N->getOperand(N->getNumOperands()-1).getNode();
+        assert(N->getNodeId() == -1 && "Node already inserted!");
+        N->setNodeId(NodeSUnit->NodeNum);
+      } while (N->getNumOperands() &&
+               N->getOperand(N->getNumOperands()-1).getValueType()== MVT::Flag);
+    }
+    
+    // Scan down to find any flagged succs.
+    N = NI;
+    while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
+      SDValue FlagVal(N, N->getNumValues()-1);
+      
+      // There are either zero or one users of the Flag result.
+      bool HasFlagUse = false;
+      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); 
+           UI != E; ++UI)
+        if (FlagVal.isOperandOf(*UI)) {
+          HasFlagUse = true;
+          assert(N->getNodeId() == -1 && "Node already inserted!");
+          N->setNodeId(NodeSUnit->NodeNum);
+          N = *UI;
+          break;
+        }
+      if (!HasFlagUse) break;
+    }
+    
+    // If there are flag operands involved, N is now the bottom-most node
+    // of the sequence of nodes that are flagged together.
+    // Update the SUnit.
+    NodeSUnit->setNode(N);
+    assert(N->getNodeId() == -1 && "Node already inserted!");
+    N->setNodeId(NodeSUnit->NodeNum);
+
+    ComputeLatency(NodeSUnit);
+  }
+  
+  // Pass 2: add the preds, succs, etc.
+  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+    SUnit *SU = &SUnits[su];
+    SDNode *MainNode = SU->getNode();
+    
+    if (MainNode->isMachineOpcode()) {
+      unsigned Opc = MainNode->getMachineOpcode();
+      const TargetInstrDesc &TID = TII->get(Opc);
+      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+          SU->isTwoAddress = true;
+          break;
+        }
+      }
+      if (TID.isCommutable())
+        SU->isCommutable = true;
+    }
+    
+    // Find all predecessors and successors of the group.
+    for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+      if (N->isMachineOpcode() &&
+          TII->get(N->getMachineOpcode()).getImplicitDefs() &&
+          CountResults(N) > TII->get(N->getMachineOpcode()).getNumDefs())
+        SU->hasPhysRegDefs = true;
+      
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+        SDNode *OpN = N->getOperand(i).getNode();
+        if (isPassiveNode(OpN)) continue;   // Not scheduled.
+        SUnit *OpSU = &SUnits[OpN->getNodeId()];
+        assert(OpSU && "Node has no SUnit!");
+        if (OpSU == SU) continue;           // In the same group.
+
+        MVT OpVT = N->getOperand(i).getValueType();
+        assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+        bool isChain = OpVT == MVT::Other;
+
+        unsigned PhysReg = 0;
+        int Cost = 1;
+        // Determine if this is a physical register dependency.
+        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+        SU->addPred(OpSU, isChain, false, PhysReg, Cost);
+      }
+    }
+  }
+}
+
+void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+  const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+  
+  // Compute the latency for the node.  We use the sum of the latencies for
+  // all nodes flagged together into this SUnit.
+  if (InstrItins.isEmpty()) {
+    // No latency information.
+    SU->Latency = 1;
+    return;
+  }
+
+  SU->Latency = 0;
+  for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+    if (N->isMachineOpcode()) {
+      unsigned SchedClass = TII->get(N->getMachineOpcode()).getSchedClass();
+      const InstrStage *S = InstrItins.begin(SchedClass);
+      const InstrStage *E = InstrItins.end(SchedClass);
+      for (; S != E; ++S)
+        SU->Latency += S->Cycles;
+    }
+  }
+}
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional flag operands (which do
+/// not go into the resulting MachineInstr).
+unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) {
+  unsigned N = Node->getNumValues();
+  while (N && Node->getValueType(N - 1) == MVT::Flag)
+    --N;
+  if (N && Node->getValueType(N - 1) == MVT::Other)
+    --N;    // Skip over chain result.
+  return N;
+}
+
+/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// followed by special operands that describe memory references, then an
+/// optional chain operand, then an optional flag operand.  Compute the number
+/// of actual operands that will go into the resulting MachineInstr.
+unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) {
+  unsigned N = ComputeMemOperandsEnd(Node);
+  while (N && isa<MemOperandSDNode>(Node->getOperand(N - 1).getNode()))
+    --N; // Ignore MEMOPERAND nodes
+  return N;
+}
+
+/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode
+/// operand
+unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) {
+  unsigned N = Node->getNumOperands();
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+    --N;
+  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+    --N; // Ignore chain if it exists.
+  return N;
+}
+
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+  if (SU->getNode())
+    SU->getNode()->dump(DAG);
+  else
+    cerr << "CROSS RC COPY ";
+  cerr << "\n";
+  SmallVector<SDNode *, 4> FlaggedNodes;
+  for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
+    FlaggedNodes.push_back(N);
+  while (!FlaggedNodes.empty()) {
+    cerr << "    ";
+    FlaggedNodes.back()->dump(DAG);
+    cerr << "\n";
+    FlaggedNodes.pop_back();
+  }
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
similarity index 89%
rename from lib/CodeGen/SelectionDAG/ScheduleDAGEmit.cpp
rename to lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
index 0c67973..dc9313b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGSDNodes.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -47,9 +47,9 @@
 
 /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
 /// implicit physical register output.
-void ScheduleDAG::EmitCopyFromReg(SDNode *Node, unsigned ResNo,
-                                  bool IsClone, unsigned SrcReg,
-                                  DenseMap<SDValue, unsigned> &VRBaseMap) {
+void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+                                         bool IsClone, unsigned SrcReg,
+                                         DenseMap<SDValue, unsigned> &VRBaseMap) {
   unsigned VRBase = 0;
   if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
     // Just use the input register directly!
@@ -142,8 +142,8 @@
 
 /// getDstOfCopyToRegUse - If the only use of the specified result number of
 /// node is a CopyToReg, return its destination register. Return 0 otherwise.
-unsigned ScheduleDAG::getDstOfOnlyCopyToRegUse(SDNode *Node,
-                                               unsigned ResNo) const {
+unsigned ScheduleDAGSDNodes::getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                                      unsigned ResNo) const {
   if (!Node->hasOneUse())
     return 0;
 
@@ -158,7 +158,7 @@
   return 0;
 }
 
-void ScheduleDAG::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+void ScheduleDAGSDNodes::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
                                  const TargetInstrDesc &II,
                                  DenseMap<SDValue, unsigned> &VRBaseMap) {
   assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF &&
@@ -202,8 +202,8 @@
 
 /// getVR - Return the virtual register corresponding to the specified result
 /// of the specified node.
-unsigned ScheduleDAG::getVR(SDValue Op,
-                            DenseMap<SDValue, unsigned> &VRBaseMap) {
+unsigned ScheduleDAGSDNodes::getVR(SDValue Op,
+                                   DenseMap<SDValue, unsigned> &VRBaseMap) {
   if (Op.isMachineOpcode() &&
       Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
     // Add an IMPLICIT_DEF instruction before every use.
@@ -228,10 +228,10 @@
 /// specifies the instruction information for the node, and IIOpNum is the
 /// operand number (in the II) that we are adding. IIOpNum and II are used for 
 /// assertions only.
-void ScheduleDAG::AddOperand(MachineInstr *MI, SDValue Op,
-                             unsigned IIOpNum,
-                             const TargetInstrDesc *II,
-                             DenseMap<SDValue, unsigned> &VRBaseMap) {
+void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op,
+                                    unsigned IIOpNum,
+                                    const TargetInstrDesc *II,
+                                    DenseMap<SDValue, unsigned> &VRBaseMap) {
   if (Op.isMachineOpcode()) {
     // Note that this case is redundant with the final else block, but we
     // include it because it is the most common and it makes the logic
@@ -328,10 +328,6 @@
   }  
 }
 
-void ScheduleDAG::AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO) {
-  MI->addMemOperand(*MF, MO);
-}
-
 /// getSubRegisterRegClass - Returns the register class of specified register
 /// class' "SubIdx"'th sub-register class.
 static const TargetRegisterClass*
@@ -361,8 +357,8 @@
 
 /// EmitSubregNode - Generate machine code for subreg nodes.
 ///
-void ScheduleDAG::EmitSubregNode(SDNode *Node, 
-                           DenseMap<SDValue, unsigned> &VRBaseMap) {
+void ScheduleDAGSDNodes::EmitSubregNode(SDNode *Node, 
+                                        DenseMap<SDValue, unsigned> &VRBaseMap) {
   unsigned VRBase = 0;
   unsigned Opc = Node->getMachineOpcode();
   
@@ -456,8 +452,8 @@
 
 /// EmitNode - Generate machine code for an node and needed dependencies.
 ///
-void ScheduleDAG::EmitNode(SDNode *Node, bool IsClone,
-                           DenseMap<SDValue, unsigned> &VRBaseMap) {
+void ScheduleDAGSDNodes::EmitNode(SDNode *Node, bool IsClone,
+                                  DenseMap<SDValue, unsigned> &VRBaseMap) {
   // If machine instruction
   if (Node->isMachineOpcode()) {
     unsigned Opc = Node->getMachineOpcode();
@@ -634,53 +630,8 @@
   }
 }
 
-void ScheduleDAG::EmitNoop() {
-  TII->insertNoop(*BB, BB->end());
-}
-
-void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
-                                  DenseMap<SUnit*, unsigned> &VRBaseMap) {
-  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I) {
-    if (I->isCtrl) continue;  // ignore chain preds
-    if (!I->Dep->getNode()) {
-      // Copy to physical register.
-      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->Dep);
-      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
-      // Find the destination physical register.
-      unsigned Reg = 0;
-      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
-             EE = SU->Succs.end(); II != EE; ++II) {
-        if (I->Reg) {
-          Reg = I->Reg;
-          break;
-        }
-      }
-      assert(I->Reg && "Unknown physical register!");
-      TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second,
-                        SU->CopyDstRC, SU->CopySrcRC);
-    } else {
-      // Copy from physical register.
-      assert(I->Reg && "Unknown physical register!");
-      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
-      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
-      isNew = isNew; // Silence compiler warning.
-      assert(isNew && "Node emitted out of order - early");
-      TII->copyRegToReg(*BB, BB->end(), VRBase, I->Reg,
-                        SU->CopyDstRC, SU->CopySrcRC);
-    }
-    break;
-  }
-}
-
 /// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAG::EmitSchedule() {
-  // For post-regalloc scheduling, we're rescheduling the instructions in the
-  // block, so start by removing them from the block.
-  if (!DAG)
-    while (!BB->empty())
-      BB->remove(BB->begin());
-
+MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
   DenseMap<SDValue, unsigned> VRBaseMap;
   DenseMap<SUnit*, unsigned> CopyVRBaseMap;
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -691,13 +642,6 @@
       continue;
     }
 
-    // For post-regalloc scheduling, we already have the instruction;
-    // just append it to the block.
-    if (!DAG) {
-      BB->push_back(SU->getInstr());
-      continue;
-    }
-
     // For pre-regalloc scheduling, create instructions corresponding to the
     // SDNode and any flagged SDNodes and append them to the block.
     SmallVector<SDNode *, 4> FlaggedNodes;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 23822fa..e43b325 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -34,7 +34,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGSDNodes.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index d748108..03b78c3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -15,7 +15,7 @@
 #include "llvm/Function.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGSDNodes.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -383,71 +383,7 @@
 #endif
 }
 
-namespace llvm {
-  template<>
-  struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
-    static std::string getGraphName(const ScheduleDAG *G) {
-      return G->MF->getFunction()->getName();
-    }
-
-    static bool renderGraphFromBottomUp() {
-      return true;
-    }
-    
-    static bool hasNodeAddressLabel(const SUnit *Node,
-                                    const ScheduleDAG *Graph) {
-      return true;
-    }
-    
-    /// If you want to override the dot attributes printed for a particular
-    /// edge, override this method.
-    template<typename EdgeIter>
-    static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
-      if (EI.isSpecialDep())
-        return "color=cyan,style=dashed";
-      if (EI.isCtrlDep())
-        return "color=blue,style=dashed";
-      return "";
-    }
-    
-
-    static std::string getNodeLabel(const SUnit *Node,
-                                    const ScheduleDAG *Graph);
-    static std::string getNodeAttributes(const SUnit *N,
-                                         const ScheduleDAG *Graph) {
-      return "shape=Mrecord";
-    }
-
-    static void addCustomGraphFeatures(ScheduleDAG *G,
-                                       GraphWriter<ScheduleDAG*> &GW) {
-      // Draw a special "GraphRoot" node to indicate the root of the graph.
-      GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
-      if (G->DAG) {
-        // For an SDNode-based ScheduleDAG, point to the root of the ScheduleDAG.
-        const SDNode *N = G->DAG->getRoot().getNode();
-        if (N && N->getNodeId() != -1)
-          GW.emitEdge(0, -1, &G->SUnits[N->getNodeId()], -1,
-                      "color=blue,style=dashed");
-      } else {
-        // For a MachineInstr-based ScheduleDAG, find a root to point to.
-        for (unsigned i = 0, e = G->SUnits.size(); i != e; ++i) {
-          if (G->SUnits[i].Succs.empty()) {
-            GW.emitEdge(0, -1, &G->SUnits[i], -1,
-                        "color=blue,style=dashed");
-            break;
-          }
-        }
-      }
-    }
-  };
-}
-
-std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
-                                                       const ScheduleDAG *G) {
-  return G->getGraphNodeLabel(SU);
-}
-
-std::string ScheduleDAG::getGraphNodeLabel(const SUnit *SU) const {
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
   std::string s;
   raw_string_ostream O(s);
   O << "SU(" << SU->NodeNum << "): ";
@@ -467,17 +403,13 @@
   return O.str();
 }
 
-/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
-/// rendered using 'dot'.
-///
-void ScheduleDAG::viewGraph() {
-// This code is only for debugging!
-#ifndef NDEBUG
-  ViewGraph(this, "dag." + MF->getFunction()->getName(),
-            "Scheduling-Units Graph for " + MF->getFunction()->getName() + ':' +
-            BB->getBasicBlock()->getName());
-#else
-  cerr << "ScheduleDAG::viewGraph is only available in debug builds on "
-       << "systems with Graphviz or gv!\n";
-#endif  // NDEBUG
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+  if (DAG) {
+    // Draw a special "GraphRoot" node to indicate the root of the graph.
+    GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+    const SDNode *N = DAG->getRoot().getNode();
+    if (N && N->getNodeId() != -1)
+      GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+                  "color=blue,style=dashed");
+  }
 }