llvm/lib/Target/AMDGPU/GCNILPSched.cpp - toolchain/llvm-project - Gitiles

 //===---------------------------- GCNILPSched.cpp - -----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 //
 //===----------------------------------------------------------------------===//

 #include "llvm/CodeGen/ScheduleDAG.h"

 using namespace llvm;

 #define DEBUG_TYPE "machine-scheduler"

 namespace {

 class GCNILPScheduler {
   struct Candidate : ilist_node<Candidate> {
     SUnit *SU;

     Candidate(SUnit *SU_)
       : SU(SU_) {}
   };

   SpecificBumpPtrAllocator<Candidate> Alloc;
   typedef simple_ilist<Candidate> Queue;
   Queue PendingQueue;
   Queue AvailQueue;
   unsigned CurQueueId = 0;

   std::vector<unsigned> SUNumbers;

   /// CurCycle - The current scheduler state corresponds to this cycle.
   unsigned CurCycle = 0;

   unsigned getNodePriority(const SUnit *SU) const;

   const SUnit *pickBest(const SUnit *left, const SUnit *right);
   Candidate* pickCandidate();

   void releasePending();
   void advanceToCycle(unsigned NextCycle);
   void releasePredecessors(const SUnit* SU);

 public:
   std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
                                      const ScheduleDAG &DAG);
 };
 } // namespace

 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
 /// Smaller number is the higher priority.
 static unsigned
 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
   unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
   if (SethiUllmanNumber != 0)
     return SethiUllmanNumber;

   unsigned Extra = 0;
   for (const SDep &Pred : SU->Preds) {
     if (Pred.isCtrl()) continue;  // ignore chain preds
     SUnit *PredSU = Pred.getSUnit();
     unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
     if (PredSethiUllman > SethiUllmanNumber) {
       SethiUllmanNumber = PredSethiUllman;
       Extra = 0;
     }
     else if (PredSethiUllman == SethiUllmanNumber)
       ++Extra;
   }

   SethiUllmanNumber += Extra;

   if (SethiUllmanNumber == 0)
     SethiUllmanNumber = 1;

   return SethiUllmanNumber;
 }

 // Lower priority means schedule further down. For bottom-up scheduling, lower
 // priority SUs are scheduled before higher priority SUs.
 unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
   assert(SU->NodeNum < SUNumbers.size());
   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
     // If SU does not have a register use, i.e. it doesn't produce a value
     // that would be consumed (e.g. store), then it terminates a chain of
     // computation.  Give it a large SethiUllman number so it will be
     // scheduled right before its predecessors that it doesn't lengthen
     // their live ranges.
     return 0xffff;

   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
     // If SU does not have a register def, schedule it close to its uses
     // because it does not lengthen any live ranges.
     return 0;

   return SUNumbers[SU->NodeNum];
 }

 /// closestSucc - Returns the scheduled cycle of the successor which is
 /// closest to the current cycle.
 static unsigned closestSucc(const SUnit *SU) {
   unsigned MaxHeight = 0;
   for (const SDep &Succ : SU->Succs) {
     if (Succ.isCtrl()) continue;  // ignore chain succs
     unsigned Height = Succ.getSUnit()->getHeight();
     // If there are bunch of CopyToRegs stacked up, they should be considered
     // to be at the same position.
     if (Height > MaxHeight)
       MaxHeight = Height;
   }
   return MaxHeight;
 }

 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
 /// for scratch registers, i.e. number of data dependencies.
 static unsigned calcMaxScratches(const SUnit *SU) {
   unsigned Scratches = 0;
   for (const SDep &Pred : SU->Preds) {
     if (Pred.isCtrl()) continue;  // ignore chain preds
     Scratches++;
   }
   return Scratches;
 }

 // Return -1 if left has higher priority, 1 if right has higher priority.
 // Return 0 if latency-based priority is equivalent.
 static int BUCompareLatency(const SUnit *left, const SUnit *right) {
   // Scheduling an instruction that uses a VReg whose postincrement has not yet
   // been scheduled will induce a copy. Model this as an extra cycle of latency.
   int LHeight = (int)left->getHeight();
   int RHeight = (int)right->getHeight();

   // If either node is scheduling for latency, sort them by height/depth
   // and latency.

   // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
   // is enabled, grouping instructions by cycle, then its height is already
   // covered so only its depth matters. We also reach this point if both stall
   // but have the same height.
   if (LHeight != RHeight)
     return LHeight > RHeight ? 1 : -1;

   int LDepth = left->getDepth();
   int RDepth = right->getDepth();
   if (LDepth != RDepth) {
     LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
                       << ") depth " << LDepth << " vs SU (" << right->NodeNum
                       << ") depth " << RDepth << "\n");
     return LDepth < RDepth ? 1 : -1;
   }
   if (left->Latency != right->Latency)
     return left->Latency > right->Latency ? 1 : -1;

   return 0;
 }

 const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
 {
   // TODO: add register pressure lowering checks

   bool const DisableSchedCriticalPath = false;
   int MaxReorderWindow = 6;
   if (!DisableSchedCriticalPath) {
     int spread = (int)left->getDepth() - (int)right->getDepth();
     if (std::abs(spread) > MaxReorderWindow) {
       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
                         << left->getDepth() << " != SU(" << right->NodeNum
                         << "): " << right->getDepth() << "\n");
       return left->getDepth() < right->getDepth() ? right : left;
     }
   }

   bool const DisableSchedHeight = false;
   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
     int spread = (int)left->getHeight() - (int)right->getHeight();
     if (std::abs(spread) > MaxReorderWindow)
       return left->getHeight() > right->getHeight() ? right : left;
   }

   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
   unsigned LPriority = getNodePriority(left);
   unsigned RPriority = getNodePriority(right);

   if (LPriority != RPriority)
     return LPriority > RPriority ? right : left;

   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
   // e.g.
   // t1 = op t2, c1
   // t3 = op t4, c2
   //
   // and the following instructions are both ready.
   // t2 = op c3
   // t4 = op c4
   //
   // Then schedule t2 = op first.
   // i.e.
   // t4 = op c4
   // t2 = op c3
   // t1 = op t2, c1
   // t3 = op t4, c2
   //
   // This creates more short live intervals.
   unsigned LDist = closestSucc(left);
   unsigned RDist = closestSucc(right);
   if (LDist != RDist)
     return LDist < RDist ? right : left;

   // How many registers becomes live when the node is scheduled.
   unsigned LScratch = calcMaxScratches(left);
   unsigned RScratch = calcMaxScratches(right);
   if (LScratch != RScratch)
     return LScratch > RScratch ? right : left;

   bool const DisableSchedCycles = false;
   if (!DisableSchedCycles) {
     int result = BUCompareLatency(left, right);
     if (result != 0)
       return result > 0 ? right : left;
     return left;
   }
   else {
     if (left->getHeight() != right->getHeight())
       return (left->getHeight() > right->getHeight()) ? right : left;

     if (left->getDepth() != right->getDepth())
       return (left->getDepth() < right->getDepth()) ? right : left;
   }

   assert(left->NodeQueueId && right->NodeQueueId &&
         "NodeQueueId cannot be zero");
   return (left->NodeQueueId > right->NodeQueueId) ? right : left;
 }

 GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
   if (AvailQueue.empty())
     return nullptr;
   auto Best = AvailQueue.begin();
   for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
     auto NewBestSU = pickBest(Best->SU, I->SU);
     if (NewBestSU != Best->SU) {
       assert(NewBestSU == I->SU);
       Best = I;
     }
   }
   return &*Best;
 }

 void GCNILPScheduler::releasePending() {
   // Check to see if any of the pending instructions are ready to issue.  If
   // so, add them to the available queue.
   for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
     auto &C = *I++;
     if (C.SU->getHeight() <= CurCycle) {
       PendingQueue.remove(C);
       AvailQueue.push_back(C);
       C.SU->NodeQueueId = CurQueueId++;
     }
   }
 }

 /// Move the scheduler state forward by the specified number of Cycles.
 void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
   if (NextCycle <= CurCycle)
     return;
   CurCycle = NextCycle;
   releasePending();
 }

 void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
   for (const auto &PredEdge : SU->Preds) {
     auto PredSU = PredEdge.getSUnit();
     if (PredEdge.isWeak())
       continue;
     assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);

     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());

     if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
       PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
   }
 }

 std::vector<const SUnit*>
 GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
                           const ScheduleDAG &DAG) {
   auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;

   std::vector<SUnit> SUSavedCopy;
   SUSavedCopy.resize(SUnits.size());

   // we cannot save only those fields we touch: some of them are private
   // so save units verbatim: this assumes SUnit should have value semantics
   for (const SUnit &SU : SUnits)
     SUSavedCopy[SU.NodeNum] = SU;

   SUNumbers.assign(SUnits.size(), 0);
   for (const SUnit &SU : SUnits)
     CalcNodeSethiUllmanNumber(&SU, SUNumbers);

   for (auto SU : BotRoots) {
     AvailQueue.push_back(
       *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
   }
   releasePredecessors(&DAG.ExitSU);

   std::vector<const SUnit*> Schedule;
   Schedule.reserve(SUnits.size());
   while (true) {
     if (AvailQueue.empty() && !PendingQueue.empty()) {
       auto EarliestSU = std::min_element(
         PendingQueue.begin(), PendingQueue.end(),
         [=](const Candidate& C1, const Candidate& C2) {
         return C1.SU->getHeight() < C2.SU->getHeight();
       })->SU;
       advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
     }
     if (AvailQueue.empty())
       break;

     LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
                          "Ready queue:";
                for (auto &C
                     : AvailQueue) dbgs()
                << ' ' << C.SU->NodeNum;
                dbgs() << '\n';);

     auto C = pickCandidate();
     assert(C);
     AvailQueue.remove(*C);
     auto SU = C->SU;
     LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));

     advanceToCycle(SU->getHeight());

     releasePredecessors(SU);
     Schedule.push_back(SU);
     SU->isScheduled = true;
   }
   assert(SUnits.size() == Schedule.size());

   std::reverse(Schedule.begin(), Schedule.end());

   // restore units
   for (auto &SU : SUnits)
     SU = SUSavedCopy[SU.NodeNum];

   return Schedule;
 }

 namespace llvm {
 std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
                                               const ScheduleDAG &DAG) {
   GCNILPScheduler S;
   return S.schedule(BotRoots, DAG);
 }
 }
	//===---------------------------- GCNILPSched.cpp - -----------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/CodeGen/ScheduleDAG.h"

	using namespace llvm;

	#define DEBUG_TYPE "machine-scheduler"

	namespace {

	class GCNILPScheduler {
	struct Candidate : ilist_node<Candidate> {
	SUnit *SU;

	Candidate(SUnit *SU_)
	: SU(SU_) {}
	};

	SpecificBumpPtrAllocator<Candidate> Alloc;
	typedef simple_ilist<Candidate> Queue;
	Queue PendingQueue;
	Queue AvailQueue;
	unsigned CurQueueId = 0;

	std::vector<unsigned> SUNumbers;

	/// CurCycle - The current scheduler state corresponds to this cycle.
	unsigned CurCycle = 0;

	unsigned getNodePriority(const SUnit *SU) const;

	const SUnit pickBest(const SUnit left, const SUnit *right);
	Candidate* pickCandidate();

	void releasePending();
	void advanceToCycle(unsigned NextCycle);
	void releasePredecessors(const SUnit* SU);

	public:
	std::vector<const SUnit> schedule(ArrayRef<const SUnit> TopRoots,
	const ScheduleDAG &DAG);
	};
	} // namespace

	/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
	/// Smaller number is the higher priority.
	static unsigned
	CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
	unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
	if (SethiUllmanNumber != 0)
	return SethiUllmanNumber;

	unsigned Extra = 0;
	for (const SDep &Pred : SU->Preds) {
	if (Pred.isCtrl()) continue; // ignore chain preds
	SUnit *PredSU = Pred.getSUnit();
	unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
	if (PredSethiUllman > SethiUllmanNumber) {
	SethiUllmanNumber = PredSethiUllman;
	Extra = 0;
	}
	else if (PredSethiUllman == SethiUllmanNumber)
	++Extra;
	}

	SethiUllmanNumber += Extra;

	if (SethiUllmanNumber == 0)
	SethiUllmanNumber = 1;

	return SethiUllmanNumber;
	}

	// Lower priority means schedule further down. For bottom-up scheduling, lower
	// priority SUs are scheduled before higher priority SUs.
	unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
	assert(SU->NodeNum < SUNumbers.size());
	if (SU->NumSuccs == 0 && SU->NumPreds != 0)
	// If SU does not have a register use, i.e. it doesn't produce a value
	// that would be consumed (e.g. store), then it terminates a chain of
	// computation. Give it a large SethiUllman number so it will be
	// scheduled right before its predecessors that it doesn't lengthen
	// their live ranges.
	return 0xffff;

	if (SU->NumPreds == 0 && SU->NumSuccs != 0)
	// If SU does not have a register def, schedule it close to its uses
	// because it does not lengthen any live ranges.
	return 0;

	return SUNumbers[SU->NodeNum];
	}

	/// closestSucc - Returns the scheduled cycle of the successor which is
	/// closest to the current cycle.
	static unsigned closestSucc(const SUnit *SU) {
	unsigned MaxHeight = 0;
	for (const SDep &Succ : SU->Succs) {
	if (Succ.isCtrl()) continue; // ignore chain succs
	unsigned Height = Succ.getSUnit()->getHeight();
	// If there are bunch of CopyToRegs stacked up, they should be considered
	// to be at the same position.
	if (Height > MaxHeight)
	MaxHeight = Height;
	}
	return MaxHeight;
	}

	/// calcMaxScratches - Returns an cost estimate of the worse case requirement
	/// for scratch registers, i.e. number of data dependencies.
	static unsigned calcMaxScratches(const SUnit *SU) {
	unsigned Scratches = 0;
	for (const SDep &Pred : SU->Preds) {
	if (Pred.isCtrl()) continue; // ignore chain preds
	Scratches++;
	}
	return Scratches;
	}

	// Return -1 if left has higher priority, 1 if right has higher priority.
	// Return 0 if latency-based priority is equivalent.
	static int BUCompareLatency(const SUnit left, const SUnit right) {
	// Scheduling an instruction that uses a VReg whose postincrement has not yet
	// been scheduled will induce a copy. Model this as an extra cycle of latency.
	int LHeight = (int)left->getHeight();
	int RHeight = (int)right->getHeight();

	// If either node is scheduling for latency, sort them by height/depth
	// and latency.

	// If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
	// is enabled, grouping instructions by cycle, then its height is already
	// covered so only its depth matters. We also reach this point if both stall
	// but have the same height.
	if (LHeight != RHeight)
	return LHeight > RHeight ? 1 : -1;

	int LDepth = left->getDepth();
	int RDepth = right->getDepth();
	if (LDepth != RDepth) {
	LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
	<< ") depth " << LDepth << " vs SU (" << right->NodeNum
	<< ") depth " << RDepth << "\n");
	return LDepth < RDepth ? 1 : -1;
	}
	if (left->Latency != right->Latency)
	return left->Latency > right->Latency ? 1 : -1;

	return 0;
	}

	const SUnit GCNILPScheduler::pickBest(const SUnit left, const SUnit *right)
	{
	// TODO: add register pressure lowering checks

	bool const DisableSchedCriticalPath = false;
	int MaxReorderWindow = 6;
	if (!DisableSchedCriticalPath) {
	int spread = (int)left->getDepth() - (int)right->getDepth();
	if (std::abs(spread) > MaxReorderWindow) {
	LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
	<< left->getDepth() << " != SU(" << right->NodeNum
	<< "): " << right->getDepth() << "\n");
	return left->getDepth() < right->getDepth() ? right : left;
	}
	}

	bool const DisableSchedHeight = false;
	if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
	int spread = (int)left->getHeight() - (int)right->getHeight();
	if (std::abs(spread) > MaxReorderWindow)
	return left->getHeight() > right->getHeight() ? right : left;
	}

	// Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
	unsigned LPriority = getNodePriority(left);
	unsigned RPriority = getNodePriority(right);

	if (LPriority != RPriority)
	return LPriority > RPriority ? right : left;

	// Try schedule def + use closer when Sethi-Ullman numbers are the same.
	// e.g.
	// t1 = op t2, c1
	// t3 = op t4, c2
	//
	// and the following instructions are both ready.
	// t2 = op c3
	// t4 = op c4
	//
	// Then schedule t2 = op first.
	// i.e.
	// t4 = op c4
	// t2 = op c3
	// t1 = op t2, c1
	// t3 = op t4, c2
	//
	// This creates more short live intervals.
	unsigned LDist = closestSucc(left);
	unsigned RDist = closestSucc(right);
	if (LDist != RDist)
	return LDist < RDist ? right : left;

	// How many registers becomes live when the node is scheduled.
	unsigned LScratch = calcMaxScratches(left);
	unsigned RScratch = calcMaxScratches(right);
	if (LScratch != RScratch)
	return LScratch > RScratch ? right : left;

	bool const DisableSchedCycles = false;
	if (!DisableSchedCycles) {
	int result = BUCompareLatency(left, right);
	if (result != 0)
	return result > 0 ? right : left;
	return left;
	}
	else {
	if (left->getHeight() != right->getHeight())
	return (left->getHeight() > right->getHeight()) ? right : left;

	if (left->getDepth() != right->getDepth())
	return (left->getDepth() < right->getDepth()) ? right : left;
	}

	assert(left->NodeQueueId && right->NodeQueueId &&
	"NodeQueueId cannot be zero");
	return (left->NodeQueueId > right->NodeQueueId) ? right : left;
	}

	GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
	if (AvailQueue.empty())
	return nullptr;
	auto Best = AvailQueue.begin();
	for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
	auto NewBestSU = pickBest(Best->SU, I->SU);
	if (NewBestSU != Best->SU) {
	assert(NewBestSU == I->SU);
	Best = I;
	}
	}
	return &*Best;
	}

	void GCNILPScheduler::releasePending() {
	// Check to see if any of the pending instructions are ready to issue. If
	// so, add them to the available queue.
	for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
	auto &C = *I++;
	if (C.SU->getHeight() <= CurCycle) {
	PendingQueue.remove(C);
	AvailQueue.push_back(C);
	C.SU->NodeQueueId = CurQueueId++;
	}
	}
	}

	/// Move the scheduler state forward by the specified number of Cycles.
	void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
	if (NextCycle <= CurCycle)
	return;
	CurCycle = NextCycle;
	releasePending();
	}

	void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
	for (const auto &PredEdge : SU->Preds) {
	auto PredSU = PredEdge.getSUnit();
	if (PredEdge.isWeak())
	continue;
	assert(PredSU->isBoundaryNode() \|\| PredSU->NumSuccsLeft > 0);

	PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());

	if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
	PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
	}
	}

	std::vector<const SUnit*>
	GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
	const ScheduleDAG &DAG) {
	auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;

	std::vector<SUnit> SUSavedCopy;
	SUSavedCopy.resize(SUnits.size());

	// we cannot save only those fields we touch: some of them are private
	// so save units verbatim: this assumes SUnit should have value semantics
	for (const SUnit &SU : SUnits)
	SUSavedCopy[SU.NodeNum] = SU;

	SUNumbers.assign(SUnits.size(), 0);
	for (const SUnit &SU : SUnits)
	CalcNodeSethiUllmanNumber(&SU, SUNumbers);

	for (auto SU : BotRoots) {
	AvailQueue.push_back(
	new (Alloc.Allocate()) Candidate(const_cast<SUnit>(SU)));
	}
	releasePredecessors(&DAG.ExitSU);

	std::vector<const SUnit*> Schedule;
	Schedule.reserve(SUnits.size());
	while (true) {
	if (AvailQueue.empty() && !PendingQueue.empty()) {
	auto EarliestSU = std::min_element(
	PendingQueue.begin(), PendingQueue.end(),
	[=](const Candidate& C1, const Candidate& C2) {
	return C1.SU->getHeight() < C2.SU->getHeight();
	})->SU;
	advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
	}
	if (AvailQueue.empty())
	break;

	LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
	"Ready queue:";
	for (auto &C
	: AvailQueue) dbgs()
	<< ' ' << C.SU->NodeNum;
	dbgs() << '\n';);

	auto C = pickCandidate();
	assert(C);
	AvailQueue.remove(*C);
	auto SU = C->SU;
	LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));

	advanceToCycle(SU->getHeight());

	releasePredecessors(SU);
	Schedule.push_back(SU);
	SU->isScheduled = true;
	}
	assert(SUnits.size() == Schedule.size());

	std::reverse(Schedule.begin(), Schedule.end());

	// restore units
	for (auto &SU : SUnits)
	SU = SUSavedCopy[SU.NodeNum];

	return Schedule;
	}

	namespace llvm {
	std::vector<const SUnit> makeGCNILPScheduler(ArrayRef<const SUnit> BotRoots,
	const ScheduleDAG &DAG) {
	GCNILPScheduler S;
	return S.schedule(BotRoots, DAG);
	}
	}