|  | //===---------------------------- GCNILPSched.cpp - -----------------------===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | /// \file | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/CodeGen/ScheduleDAG.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "machine-scheduler" | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class GCNILPScheduler { | 
|  | struct Candidate : ilist_node<Candidate> { | 
|  | SUnit *SU; | 
|  |  | 
|  | Candidate(SUnit *SU_) | 
|  | : SU(SU_) {} | 
|  | }; | 
|  |  | 
|  | SpecificBumpPtrAllocator<Candidate> Alloc; | 
|  | typedef simple_ilist<Candidate> Queue; | 
|  | Queue PendingQueue; | 
|  | Queue AvailQueue; | 
|  | unsigned CurQueueId = 0; | 
|  |  | 
|  | std::vector<unsigned> SUNumbers; | 
|  |  | 
|  | /// CurCycle - The current scheduler state corresponds to this cycle. | 
|  | unsigned CurCycle = 0; | 
|  |  | 
|  | unsigned getNodePriority(const SUnit *SU) const; | 
|  |  | 
|  | const SUnit *pickBest(const SUnit *left, const SUnit *right); | 
|  | Candidate* pickCandidate(); | 
|  |  | 
|  | void releasePending(); | 
|  | void advanceToCycle(unsigned NextCycle); | 
|  | void releasePredecessors(const SUnit* SU); | 
|  |  | 
|  | public: | 
|  | std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots, | 
|  | const ScheduleDAG &DAG); | 
|  | }; | 
|  | } // namespace | 
|  |  | 
|  | /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. | 
|  | /// Smaller number is the higher priority. | 
|  | static unsigned | 
|  | CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { | 
|  | unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; | 
|  | if (SethiUllmanNumber != 0) | 
|  | return SethiUllmanNumber; | 
|  |  | 
|  | unsigned Extra = 0; | 
|  | for (const SDep &Pred : SU->Preds) { | 
|  | if (Pred.isCtrl()) continue;  // ignore chain preds | 
|  | SUnit *PredSU = Pred.getSUnit(); | 
|  | unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); | 
|  | if (PredSethiUllman > SethiUllmanNumber) { | 
|  | SethiUllmanNumber = PredSethiUllman; | 
|  | Extra = 0; | 
|  | } | 
|  | else if (PredSethiUllman == SethiUllmanNumber) | 
|  | ++Extra; | 
|  | } | 
|  |  | 
|  | SethiUllmanNumber += Extra; | 
|  |  | 
|  | if (SethiUllmanNumber == 0) | 
|  | SethiUllmanNumber = 1; | 
|  |  | 
|  | return SethiUllmanNumber; | 
|  | } | 
|  |  | 
|  | // Lower priority means schedule further down. For bottom-up scheduling, lower | 
|  | // priority SUs are scheduled before higher priority SUs. | 
|  | unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const { | 
|  | assert(SU->NodeNum < SUNumbers.size()); | 
|  | if (SU->NumSuccs == 0 && SU->NumPreds != 0) | 
|  | // If SU does not have a register use, i.e. it doesn't produce a value | 
|  | // that would be consumed (e.g. store), then it terminates a chain of | 
|  | // computation.  Give it a large SethiUllman number so it will be | 
|  | // scheduled right before its predecessors that it doesn't lengthen | 
|  | // their live ranges. | 
|  | return 0xffff; | 
|  |  | 
|  | if (SU->NumPreds == 0 && SU->NumSuccs != 0) | 
|  | // If SU does not have a register def, schedule it close to its uses | 
|  | // because it does not lengthen any live ranges. | 
|  | return 0; | 
|  |  | 
|  | return SUNumbers[SU->NodeNum]; | 
|  | } | 
|  |  | 
|  | /// closestSucc - Returns the scheduled cycle of the successor which is | 
|  | /// closest to the current cycle. | 
|  | static unsigned closestSucc(const SUnit *SU) { | 
|  | unsigned MaxHeight = 0; | 
|  | for (const SDep &Succ : SU->Succs) { | 
|  | if (Succ.isCtrl()) continue;  // ignore chain succs | 
|  | unsigned Height = Succ.getSUnit()->getHeight(); | 
|  | // If there are bunch of CopyToRegs stacked up, they should be considered | 
|  | // to be at the same position. | 
|  | if (Height > MaxHeight) | 
|  | MaxHeight = Height; | 
|  | } | 
|  | return MaxHeight; | 
|  | } | 
|  |  | 
|  | /// calcMaxScratches - Returns an cost estimate of the worse case requirement | 
|  | /// for scratch registers, i.e. number of data dependencies. | 
|  | static unsigned calcMaxScratches(const SUnit *SU) { | 
|  | unsigned Scratches = 0; | 
|  | for (const SDep &Pred : SU->Preds) { | 
|  | if (Pred.isCtrl()) continue;  // ignore chain preds | 
|  | Scratches++; | 
|  | } | 
|  | return Scratches; | 
|  | } | 
|  |  | 
|  | // Return -1 if left has higher priority, 1 if right has higher priority. | 
|  | // Return 0 if latency-based priority is equivalent. | 
|  | static int BUCompareLatency(const SUnit *left, const SUnit *right) { | 
|  | // Scheduling an instruction that uses a VReg whose postincrement has not yet | 
|  | // been scheduled will induce a copy. Model this as an extra cycle of latency. | 
|  | int LHeight = (int)left->getHeight(); | 
|  | int RHeight = (int)right->getHeight(); | 
|  |  | 
|  | // If either node is scheduling for latency, sort them by height/depth | 
|  | // and latency. | 
|  |  | 
|  | // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer | 
|  | // is enabled, grouping instructions by cycle, then its height is already | 
|  | // covered so only its depth matters. We also reach this point if both stall | 
|  | // but have the same height. | 
|  | if (LHeight != RHeight) | 
|  | return LHeight > RHeight ? 1 : -1; | 
|  |  | 
|  | int LDepth = left->getDepth(); | 
|  | int RDepth = right->getDepth(); | 
|  | if (LDepth != RDepth) { | 
|  | LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum | 
|  | << ") depth " << LDepth << " vs SU (" << right->NodeNum | 
|  | << ") depth " << RDepth << "\n"); | 
|  | return LDepth < RDepth ? 1 : -1; | 
|  | } | 
|  | if (left->Latency != right->Latency) | 
|  | return left->Latency > right->Latency ? 1 : -1; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right) | 
|  | { | 
|  | // TODO: add register pressure lowering checks | 
|  |  | 
|  | bool const DisableSchedCriticalPath = false; | 
|  | int MaxReorderWindow = 6; | 
|  | if (!DisableSchedCriticalPath) { | 
|  | int spread = (int)left->getDepth() - (int)right->getDepth(); | 
|  | if (std::abs(spread) > MaxReorderWindow) { | 
|  | LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " | 
|  | << left->getDepth() << " != SU(" << right->NodeNum | 
|  | << "): " << right->getDepth() << "\n"); | 
|  | return left->getDepth() < right->getDepth() ? right : left; | 
|  | } | 
|  | } | 
|  |  | 
|  | bool const DisableSchedHeight = false; | 
|  | if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { | 
|  | int spread = (int)left->getHeight() - (int)right->getHeight(); | 
|  | if (std::abs(spread) > MaxReorderWindow) | 
|  | return left->getHeight() > right->getHeight() ? right : left; | 
|  | } | 
|  |  | 
|  | // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. | 
|  | unsigned LPriority = getNodePriority(left); | 
|  | unsigned RPriority = getNodePriority(right); | 
|  |  | 
|  | if (LPriority != RPriority) | 
|  | return LPriority > RPriority ? right : left; | 
|  |  | 
|  | // Try schedule def + use closer when Sethi-Ullman numbers are the same. | 
|  | // e.g. | 
|  | // t1 = op t2, c1 | 
|  | // t3 = op t4, c2 | 
|  | // | 
|  | // and the following instructions are both ready. | 
|  | // t2 = op c3 | 
|  | // t4 = op c4 | 
|  | // | 
|  | // Then schedule t2 = op first. | 
|  | // i.e. | 
|  | // t4 = op c4 | 
|  | // t2 = op c3 | 
|  | // t1 = op t2, c1 | 
|  | // t3 = op t4, c2 | 
|  | // | 
|  | // This creates more short live intervals. | 
|  | unsigned LDist = closestSucc(left); | 
|  | unsigned RDist = closestSucc(right); | 
|  | if (LDist != RDist) | 
|  | return LDist < RDist ? right : left; | 
|  |  | 
|  | // How many registers becomes live when the node is scheduled. | 
|  | unsigned LScratch = calcMaxScratches(left); | 
|  | unsigned RScratch = calcMaxScratches(right); | 
|  | if (LScratch != RScratch) | 
|  | return LScratch > RScratch ? right : left; | 
|  |  | 
|  | bool const DisableSchedCycles = false; | 
|  | if (!DisableSchedCycles) { | 
|  | int result = BUCompareLatency(left, right); | 
|  | if (result != 0) | 
|  | return result > 0 ? right : left; | 
|  | return left; | 
|  | } | 
|  | else { | 
|  | if (left->getHeight() != right->getHeight()) | 
|  | return (left->getHeight() > right->getHeight()) ? right : left; | 
|  |  | 
|  | if (left->getDepth() != right->getDepth()) | 
|  | return (left->getDepth() < right->getDepth()) ? right : left; | 
|  | } | 
|  |  | 
|  | assert(left->NodeQueueId && right->NodeQueueId && | 
|  | "NodeQueueId cannot be zero"); | 
|  | return (left->NodeQueueId > right->NodeQueueId) ? right : left; | 
|  | } | 
|  |  | 
|  | GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() { | 
|  | if (AvailQueue.empty()) | 
|  | return nullptr; | 
|  | auto Best = AvailQueue.begin(); | 
|  | for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) { | 
|  | auto NewBestSU = pickBest(Best->SU, I->SU); | 
|  | if (NewBestSU != Best->SU) { | 
|  | assert(NewBestSU == I->SU); | 
|  | Best = I; | 
|  | } | 
|  | } | 
|  | return &*Best; | 
|  | } | 
|  |  | 
|  | void GCNILPScheduler::releasePending() { | 
|  | // Check to see if any of the pending instructions are ready to issue.  If | 
|  | // so, add them to the available queue. | 
|  | for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) { | 
|  | auto &C = *I++; | 
|  | if (C.SU->getHeight() <= CurCycle) { | 
|  | PendingQueue.remove(C); | 
|  | AvailQueue.push_back(C); | 
|  | C.SU->NodeQueueId = CurQueueId++; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Move the scheduler state forward by the specified number of Cycles. | 
|  | void GCNILPScheduler::advanceToCycle(unsigned NextCycle) { | 
|  | if (NextCycle <= CurCycle) | 
|  | return; | 
|  | CurCycle = NextCycle; | 
|  | releasePending(); | 
|  | } | 
|  |  | 
|  | void GCNILPScheduler::releasePredecessors(const SUnit* SU) { | 
|  | for (const auto &PredEdge : SU->Preds) { | 
|  | auto PredSU = PredEdge.getSUnit(); | 
|  | if (PredEdge.isWeak()) | 
|  | continue; | 
|  | assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0); | 
|  |  | 
|  | PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency()); | 
|  |  | 
|  | if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0) | 
|  | PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU)); | 
|  | } | 
|  | } | 
|  |  | 
|  | std::vector<const SUnit*> | 
|  | GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots, | 
|  | const ScheduleDAG &DAG) { | 
|  | auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits; | 
|  |  | 
|  | std::vector<SUnit> SUSavedCopy; | 
|  | SUSavedCopy.resize(SUnits.size()); | 
|  |  | 
|  | // we cannot save only those fields we touch: some of them are private | 
|  | // so save units verbatim: this assumes SUnit should have value semantics | 
|  | for (const SUnit &SU : SUnits) | 
|  | SUSavedCopy[SU.NodeNum] = SU; | 
|  |  | 
|  | SUNumbers.assign(SUnits.size(), 0); | 
|  | for (const SUnit &SU : SUnits) | 
|  | CalcNodeSethiUllmanNumber(&SU, SUNumbers); | 
|  |  | 
|  | for (auto SU : BotRoots) { | 
|  | AvailQueue.push_back( | 
|  | *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU))); | 
|  | } | 
|  | releasePredecessors(&DAG.ExitSU); | 
|  |  | 
|  | std::vector<const SUnit*> Schedule; | 
|  | Schedule.reserve(SUnits.size()); | 
|  | while (true) { | 
|  | if (AvailQueue.empty() && !PendingQueue.empty()) { | 
|  | auto EarliestSU = std::min_element( | 
|  | PendingQueue.begin(), PendingQueue.end(), | 
|  | [=](const Candidate& C1, const Candidate& C2) { | 
|  | return C1.SU->getHeight() < C2.SU->getHeight(); | 
|  | })->SU; | 
|  | advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight())); | 
|  | } | 
|  | if (AvailQueue.empty()) | 
|  | break; | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n" | 
|  | "Ready queue:"; | 
|  | for (auto &C | 
|  | : AvailQueue) dbgs() | 
|  | << ' ' << C.SU->NodeNum; | 
|  | dbgs() << '\n';); | 
|  |  | 
|  | auto C = pickCandidate(); | 
|  | assert(C); | 
|  | AvailQueue.remove(*C); | 
|  | auto SU = C->SU; | 
|  | LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU)); | 
|  |  | 
|  | advanceToCycle(SU->getHeight()); | 
|  |  | 
|  | releasePredecessors(SU); | 
|  | Schedule.push_back(SU); | 
|  | SU->isScheduled = true; | 
|  | } | 
|  | assert(SUnits.size() == Schedule.size()); | 
|  |  | 
|  | std::reverse(Schedule.begin(), Schedule.end()); | 
|  |  | 
|  | // restore units | 
|  | for (auto &SU : SUnits) | 
|  | SU = SUSavedCopy[SU.NodeNum]; | 
|  |  | 
|  | return Schedule; | 
|  | } | 
|  |  | 
|  | namespace llvm { | 
|  | std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots, | 
|  | const ScheduleDAG &DAG) { | 
|  | GCNILPScheduler S; | 
|  | return S.schedule(BotRoots, DAG); | 
|  | } | 
|  | } |