Valery Pykhtin | f2fe972 | 2017-11-20 14:35:53 +0000 | [diff] [blame] | 1 | //===---------------------------- GCNILPSched.cpp - -----------------------===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame^] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Valery Pykhtin | f2fe972 | 2017-11-20 14:35:53 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/CodeGen/ScheduleDAG.h" |
| 14 | |
| 15 | using namespace llvm; |
| 16 | |
| 17 | #define DEBUG_TYPE "machine-scheduler" |
| 18 | |
| 19 | namespace { |
| 20 | |
| 21 | class GCNILPScheduler { |
| 22 | struct Candidate : ilist_node<Candidate> { |
| 23 | SUnit *SU; |
| 24 | |
| 25 | Candidate(SUnit *SU_) |
| 26 | : SU(SU_) {} |
| 27 | }; |
| 28 | |
| 29 | SpecificBumpPtrAllocator<Candidate> Alloc; |
| 30 | typedef simple_ilist<Candidate> Queue; |
| 31 | Queue PendingQueue; |
| 32 | Queue AvailQueue; |
| 33 | unsigned CurQueueId = 0; |
| 34 | |
| 35 | std::vector<unsigned> SUNumbers; |
| 36 | |
| 37 | /// CurCycle - The current scheduler state corresponds to this cycle. |
| 38 | unsigned CurCycle = 0; |
| 39 | |
| 40 | unsigned getNodePriority(const SUnit *SU) const; |
| 41 | |
| 42 | const SUnit *pickBest(const SUnit *left, const SUnit *right); |
| 43 | Candidate* pickCandidate(); |
| 44 | |
| 45 | void releasePending(); |
| 46 | void advanceToCycle(unsigned NextCycle); |
| 47 | void releasePredecessors(const SUnit* SU); |
| 48 | |
| 49 | public: |
| 50 | std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots, |
| 51 | const ScheduleDAG &DAG); |
| 52 | }; |
| 53 | } // namespace |
| 54 | |
| 55 | /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. |
| 56 | /// Smaller number is the higher priority. |
| 57 | static unsigned |
| 58 | CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { |
| 59 | unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; |
| 60 | if (SethiUllmanNumber != 0) |
| 61 | return SethiUllmanNumber; |
| 62 | |
| 63 | unsigned Extra = 0; |
| 64 | for (const SDep &Pred : SU->Preds) { |
| 65 | if (Pred.isCtrl()) continue; // ignore chain preds |
| 66 | SUnit *PredSU = Pred.getSUnit(); |
| 67 | unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); |
| 68 | if (PredSethiUllman > SethiUllmanNumber) { |
| 69 | SethiUllmanNumber = PredSethiUllman; |
| 70 | Extra = 0; |
| 71 | } |
| 72 | else if (PredSethiUllman == SethiUllmanNumber) |
| 73 | ++Extra; |
| 74 | } |
| 75 | |
| 76 | SethiUllmanNumber += Extra; |
| 77 | |
| 78 | if (SethiUllmanNumber == 0) |
| 79 | SethiUllmanNumber = 1; |
| 80 | |
| 81 | return SethiUllmanNumber; |
| 82 | } |
| 83 | |
| 84 | // Lower priority means schedule further down. For bottom-up scheduling, lower |
| 85 | // priority SUs are scheduled before higher priority SUs. |
| 86 | unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const { |
| 87 | assert(SU->NodeNum < SUNumbers.size()); |
| 88 | if (SU->NumSuccs == 0 && SU->NumPreds != 0) |
| 89 | // If SU does not have a register use, i.e. it doesn't produce a value |
| 90 | // that would be consumed (e.g. store), then it terminates a chain of |
| 91 | // computation. Give it a large SethiUllman number so it will be |
| 92 | // scheduled right before its predecessors that it doesn't lengthen |
| 93 | // their live ranges. |
| 94 | return 0xffff; |
| 95 | |
| 96 | if (SU->NumPreds == 0 && SU->NumSuccs != 0) |
| 97 | // If SU does not have a register def, schedule it close to its uses |
| 98 | // because it does not lengthen any live ranges. |
| 99 | return 0; |
| 100 | |
| 101 | return SUNumbers[SU->NodeNum]; |
| 102 | } |
| 103 | |
| 104 | /// closestSucc - Returns the scheduled cycle of the successor which is |
| 105 | /// closest to the current cycle. |
| 106 | static unsigned closestSucc(const SUnit *SU) { |
| 107 | unsigned MaxHeight = 0; |
| 108 | for (const SDep &Succ : SU->Succs) { |
| 109 | if (Succ.isCtrl()) continue; // ignore chain succs |
| 110 | unsigned Height = Succ.getSUnit()->getHeight(); |
| 111 | // If there are bunch of CopyToRegs stacked up, they should be considered |
| 112 | // to be at the same position. |
| 113 | if (Height > MaxHeight) |
| 114 | MaxHeight = Height; |
| 115 | } |
| 116 | return MaxHeight; |
| 117 | } |
| 118 | |
| 119 | /// calcMaxScratches - Returns an cost estimate of the worse case requirement |
| 120 | /// for scratch registers, i.e. number of data dependencies. |
| 121 | static unsigned calcMaxScratches(const SUnit *SU) { |
| 122 | unsigned Scratches = 0; |
| 123 | for (const SDep &Pred : SU->Preds) { |
| 124 | if (Pred.isCtrl()) continue; // ignore chain preds |
| 125 | Scratches++; |
| 126 | } |
| 127 | return Scratches; |
| 128 | } |
| 129 | |
| 130 | // Return -1 if left has higher priority, 1 if right has higher priority. |
| 131 | // Return 0 if latency-based priority is equivalent. |
| 132 | static int BUCompareLatency(const SUnit *left, const SUnit *right) { |
| 133 | // Scheduling an instruction that uses a VReg whose postincrement has not yet |
| 134 | // been scheduled will induce a copy. Model this as an extra cycle of latency. |
| 135 | int LHeight = (int)left->getHeight(); |
| 136 | int RHeight = (int)right->getHeight(); |
| 137 | |
| 138 | // If either node is scheduling for latency, sort them by height/depth |
| 139 | // and latency. |
| 140 | |
| 141 | // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer |
| 142 | // is enabled, grouping instructions by cycle, then its height is already |
| 143 | // covered so only its depth matters. We also reach this point if both stall |
| 144 | // but have the same height. |
| 145 | if (LHeight != RHeight) |
| 146 | return LHeight > RHeight ? 1 : -1; |
| 147 | |
| 148 | int LDepth = left->getDepth(); |
| 149 | int RDepth = right->getDepth(); |
| 150 | if (LDepth != RDepth) { |
Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 151 | LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum |
| 152 | << ") depth " << LDepth << " vs SU (" << right->NodeNum |
| 153 | << ") depth " << RDepth << "\n"); |
Valery Pykhtin | f2fe972 | 2017-11-20 14:35:53 +0000 | [diff] [blame] | 154 | return LDepth < RDepth ? 1 : -1; |
| 155 | } |
| 156 | if (left->Latency != right->Latency) |
| 157 | return left->Latency > right->Latency ? 1 : -1; |
| 158 | |
| 159 | return 0; |
| 160 | } |
| 161 | |
| 162 | const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right) |
| 163 | { |
| 164 | // TODO: add register pressure lowering checks |
| 165 | |
| 166 | bool const DisableSchedCriticalPath = false; |
| 167 | int MaxReorderWindow = 6; |
| 168 | if (!DisableSchedCriticalPath) { |
| 169 | int spread = (int)left->getDepth() - (int)right->getDepth(); |
| 170 | if (std::abs(spread) > MaxReorderWindow) { |
Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 171 | LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " |
| 172 | << left->getDepth() << " != SU(" << right->NodeNum |
| 173 | << "): " << right->getDepth() << "\n"); |
Valery Pykhtin | f2fe972 | 2017-11-20 14:35:53 +0000 | [diff] [blame] | 174 | return left->getDepth() < right->getDepth() ? right : left; |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | bool const DisableSchedHeight = false; |
| 179 | if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { |
| 180 | int spread = (int)left->getHeight() - (int)right->getHeight(); |
| 181 | if (std::abs(spread) > MaxReorderWindow) |
| 182 | return left->getHeight() > right->getHeight() ? right : left; |
| 183 | } |
| 184 | |
| 185 | // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. |
| 186 | unsigned LPriority = getNodePriority(left); |
| 187 | unsigned RPriority = getNodePriority(right); |
| 188 | |
| 189 | if (LPriority != RPriority) |
| 190 | return LPriority > RPriority ? right : left; |
| 191 | |
| 192 | // Try schedule def + use closer when Sethi-Ullman numbers are the same. |
| 193 | // e.g. |
| 194 | // t1 = op t2, c1 |
| 195 | // t3 = op t4, c2 |
| 196 | // |
| 197 | // and the following instructions are both ready. |
| 198 | // t2 = op c3 |
| 199 | // t4 = op c4 |
| 200 | // |
| 201 | // Then schedule t2 = op first. |
| 202 | // i.e. |
| 203 | // t4 = op c4 |
| 204 | // t2 = op c3 |
| 205 | // t1 = op t2, c1 |
| 206 | // t3 = op t4, c2 |
| 207 | // |
| 208 | // This creates more short live intervals. |
| 209 | unsigned LDist = closestSucc(left); |
| 210 | unsigned RDist = closestSucc(right); |
| 211 | if (LDist != RDist) |
| 212 | return LDist < RDist ? right : left; |
| 213 | |
| 214 | // How many registers becomes live when the node is scheduled. |
| 215 | unsigned LScratch = calcMaxScratches(left); |
| 216 | unsigned RScratch = calcMaxScratches(right); |
| 217 | if (LScratch != RScratch) |
| 218 | return LScratch > RScratch ? right : left; |
| 219 | |
| 220 | bool const DisableSchedCycles = false; |
| 221 | if (!DisableSchedCycles) { |
| 222 | int result = BUCompareLatency(left, right); |
| 223 | if (result != 0) |
| 224 | return result > 0 ? right : left; |
| 225 | return left; |
| 226 | } |
| 227 | else { |
| 228 | if (left->getHeight() != right->getHeight()) |
| 229 | return (left->getHeight() > right->getHeight()) ? right : left; |
| 230 | |
| 231 | if (left->getDepth() != right->getDepth()) |
| 232 | return (left->getDepth() < right->getDepth()) ? right : left; |
| 233 | } |
| 234 | |
| 235 | assert(left->NodeQueueId && right->NodeQueueId && |
| 236 | "NodeQueueId cannot be zero"); |
| 237 | return (left->NodeQueueId > right->NodeQueueId) ? right : left; |
| 238 | } |
| 239 | |
| 240 | GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() { |
| 241 | if (AvailQueue.empty()) |
| 242 | return nullptr; |
| 243 | auto Best = AvailQueue.begin(); |
| 244 | for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) { |
| 245 | auto NewBestSU = pickBest(Best->SU, I->SU); |
| 246 | if (NewBestSU != Best->SU) { |
| 247 | assert(NewBestSU == I->SU); |
| 248 | Best = I; |
| 249 | } |
| 250 | } |
| 251 | return &*Best; |
| 252 | } |
| 253 | |
| 254 | void GCNILPScheduler::releasePending() { |
| 255 | // Check to see if any of the pending instructions are ready to issue. If |
| 256 | // so, add them to the available queue. |
| 257 | for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) { |
| 258 | auto &C = *I++; |
| 259 | if (C.SU->getHeight() <= CurCycle) { |
| 260 | PendingQueue.remove(C); |
| 261 | AvailQueue.push_back(C); |
| 262 | C.SU->NodeQueueId = CurQueueId++; |
| 263 | } |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | /// Move the scheduler state forward by the specified number of Cycles. |
| 268 | void GCNILPScheduler::advanceToCycle(unsigned NextCycle) { |
| 269 | if (NextCycle <= CurCycle) |
| 270 | return; |
| 271 | CurCycle = NextCycle; |
| 272 | releasePending(); |
| 273 | } |
| 274 | |
| 275 | void GCNILPScheduler::releasePredecessors(const SUnit* SU) { |
| 276 | for (const auto &PredEdge : SU->Preds) { |
| 277 | auto PredSU = PredEdge.getSUnit(); |
| 278 | if (PredEdge.isWeak()) |
| 279 | continue; |
| 280 | assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0); |
| 281 | |
| 282 | PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency()); |
| 283 | |
| 284 | if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0) |
| 285 | PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU)); |
| 286 | } |
| 287 | } |
| 288 | |
| 289 | std::vector<const SUnit*> |
| 290 | GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots, |
| 291 | const ScheduleDAG &DAG) { |
| 292 | auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits; |
| 293 | |
| 294 | std::vector<SUnit> SUSavedCopy; |
| 295 | SUSavedCopy.resize(SUnits.size()); |
| 296 | |
| 297 | // we cannot save only those fields we touch: some of them are private |
| 298 | // so save units verbatim: this assumes SUnit should have value semantics |
| 299 | for (const SUnit &SU : SUnits) |
| 300 | SUSavedCopy[SU.NodeNum] = SU; |
| 301 | |
| 302 | SUNumbers.assign(SUnits.size(), 0); |
| 303 | for (const SUnit &SU : SUnits) |
| 304 | CalcNodeSethiUllmanNumber(&SU, SUNumbers); |
| 305 | |
| 306 | for (auto SU : BotRoots) { |
| 307 | AvailQueue.push_back( |
| 308 | *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU))); |
| 309 | } |
| 310 | releasePredecessors(&DAG.ExitSU); |
| 311 | |
| 312 | std::vector<const SUnit*> Schedule; |
| 313 | Schedule.reserve(SUnits.size()); |
| 314 | while (true) { |
| 315 | if (AvailQueue.empty() && !PendingQueue.empty()) { |
| 316 | auto EarliestSU = std::min_element( |
| 317 | PendingQueue.begin(), PendingQueue.end(), |
| 318 | [=](const Candidate& C1, const Candidate& C2) { |
| 319 | return C1.SU->getHeight() < C2.SU->getHeight(); |
| 320 | })->SU; |
| 321 | advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight())); |
| 322 | } |
| 323 | if (AvailQueue.empty()) |
| 324 | break; |
| 325 | |
Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 326 | LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n" |
| 327 | "Ready queue:"; |
| 328 | for (auto &C |
| 329 | : AvailQueue) dbgs() |
| 330 | << ' ' << C.SU->NodeNum; |
| 331 | dbgs() << '\n';); |
Valery Pykhtin | f2fe972 | 2017-11-20 14:35:53 +0000 | [diff] [blame] | 332 | |
| 333 | auto C = pickCandidate(); |
| 334 | assert(C); |
| 335 | AvailQueue.remove(*C); |
| 336 | auto SU = C->SU; |
Matthias Braun | 726e12c | 2018-09-19 00:23:35 +0000 | [diff] [blame] | 337 | LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU)); |
Valery Pykhtin | f2fe972 | 2017-11-20 14:35:53 +0000 | [diff] [blame] | 338 | |
| 339 | advanceToCycle(SU->getHeight()); |
| 340 | |
| 341 | releasePredecessors(SU); |
| 342 | Schedule.push_back(SU); |
| 343 | SU->isScheduled = true; |
| 344 | } |
| 345 | assert(SUnits.size() == Schedule.size()); |
| 346 | |
| 347 | std::reverse(Schedule.begin(), Schedule.end()); |
| 348 | |
| 349 | // restore units |
| 350 | for (auto &SU : SUnits) |
| 351 | SU = SUSavedCopy[SU.NodeNum]; |
| 352 | |
| 353 | return Schedule; |
| 354 | } |
| 355 | |
| 356 | namespace llvm { |
| 357 | std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots, |
| 358 | const ScheduleDAG &DAG) { |
| 359 | GCNILPScheduler S; |
| 360 | return S.schedule(BotRoots, DAG); |
| 361 | } |
| 362 | } |