Sergei Larin | 3e59040 | 2012-09-04 14:49:56 +0000 | [diff] [blame] | 1 | //===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // MachineScheduler schedules machine instructions after phi elimination. It |
| 11 | // preserves LiveIntervals so it can be invoked before register allocation. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #define DEBUG_TYPE "misched" |
| 16 | |
| 17 | #include "HexagonMachineScheduler.h" |
| 18 | |
| 19 | #include <queue> |
| 20 | |
| 21 | using namespace llvm; |
| 22 | |
| 23 | static cl::opt<bool> ForceTopDown("vliw-misched-topdown", cl::Hidden, |
| 24 | cl::desc("Force top-down list scheduling")); |
| 25 | static cl::opt<bool> ForceBottomUp("vliw-misched-bottomup", cl::Hidden, |
| 26 | cl::desc("Force bottom-up list scheduling")); |
| 27 | |
| 28 | #ifndef NDEBUG |
| 29 | static cl::opt<bool> ViewMISchedDAGs("vliw-view-misched-dags", cl::Hidden, |
| 30 | cl::desc("Pop up a window to show MISched dags after they are processed")); |
| 31 | |
| 32 | static cl::opt<unsigned> MISchedCutoff("vliw-misched-cutoff", cl::Hidden, |
| 33 | cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); |
| 34 | #else |
| 35 | static bool ViewMISchedDAGs = false; |
| 36 | #endif // NDEBUG |
| 37 | |
| 38 | /// Decrement this iterator until reaching the top or a non-debug instr. |
| 39 | static MachineBasicBlock::iterator |
| 40 | priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { |
| 41 | assert(I != Beg && "reached the top of the region, cannot decrement"); |
| 42 | while (--I != Beg) { |
| 43 | if (!I->isDebugValue()) |
| 44 | break; |
| 45 | } |
| 46 | return I; |
| 47 | } |
| 48 | |
| 49 | /// If this iterator is a debug value, increment until reaching the End or a |
| 50 | /// non-debug instruction. |
| 51 | static MachineBasicBlock::iterator |
| 52 | nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { |
| 53 | for(; I != End; ++I) { |
| 54 | if (!I->isDebugValue()) |
| 55 | break; |
| 56 | } |
| 57 | return I; |
| 58 | } |
| 59 | |
| 60 | /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When |
| 61 | /// NumPredsLeft reaches zero, release the successor node. |
| 62 | /// |
| 63 | /// FIXME: Adjust SuccSU height based on MinLatency. |
| 64 | void VLIWMachineScheduler::releaseSucc(SUnit *SU, SDep *SuccEdge) { |
| 65 | SUnit *SuccSU = SuccEdge->getSUnit(); |
| 66 | |
| 67 | #ifndef NDEBUG |
| 68 | if (SuccSU->NumPredsLeft == 0) { |
| 69 | dbgs() << "*** Scheduling failed! ***\n"; |
| 70 | SuccSU->dump(this); |
| 71 | dbgs() << " has been released too many times!\n"; |
| 72 | llvm_unreachable(0); |
| 73 | } |
| 74 | #endif |
| 75 | --SuccSU->NumPredsLeft; |
| 76 | if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) |
| 77 | SchedImpl->releaseTopNode(SuccSU); |
| 78 | } |
| 79 | |
| 80 | /// releaseSuccessors - Call releaseSucc on each of SU's successors. |
| 81 | void VLIWMachineScheduler::releaseSuccessors(SUnit *SU) { |
| 82 | for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); |
| 83 | I != E; ++I) { |
| 84 | releaseSucc(SU, &*I); |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When |
| 89 | /// NumSuccsLeft reaches zero, release the predecessor node. |
| 90 | /// |
| 91 | /// FIXME: Adjust PredSU height based on MinLatency. |
| 92 | void VLIWMachineScheduler::releasePred(SUnit *SU, SDep *PredEdge) { |
| 93 | SUnit *PredSU = PredEdge->getSUnit(); |
| 94 | |
| 95 | #ifndef NDEBUG |
| 96 | if (PredSU->NumSuccsLeft == 0) { |
| 97 | dbgs() << "*** Scheduling failed! ***\n"; |
| 98 | PredSU->dump(this); |
| 99 | dbgs() << " has been released too many times!\n"; |
| 100 | llvm_unreachable(0); |
| 101 | } |
| 102 | #endif |
| 103 | --PredSU->NumSuccsLeft; |
| 104 | if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) |
| 105 | SchedImpl->releaseBottomNode(PredSU); |
| 106 | } |
| 107 | |
| 108 | /// releasePredecessors - Call releasePred on each of SU's predecessors. |
| 109 | void VLIWMachineScheduler::releasePredecessors(SUnit *SU) { |
| 110 | for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); |
| 111 | I != E; ++I) { |
| 112 | releasePred(SU, &*I); |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | void VLIWMachineScheduler::moveInstruction(MachineInstr *MI, |
| 117 | MachineBasicBlock::iterator InsertPos) { |
| 118 | // Advance RegionBegin if the first instruction moves down. |
| 119 | if (&*RegionBegin == MI) |
| 120 | ++RegionBegin; |
| 121 | |
| 122 | // Update the instruction stream. |
| 123 | BB->splice(InsertPos, BB, MI); |
| 124 | |
| 125 | // Update LiveIntervals |
| 126 | LIS->handleMove(MI); |
| 127 | |
| 128 | // Recede RegionBegin if an instruction moves above the first. |
| 129 | if (RegionBegin == InsertPos) |
| 130 | RegionBegin = MI; |
| 131 | } |
| 132 | |
| 133 | bool VLIWMachineScheduler::checkSchedLimit() { |
| 134 | #ifndef NDEBUG |
| 135 | if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { |
| 136 | CurrentTop = CurrentBottom; |
| 137 | return false; |
| 138 | } |
| 139 | ++NumInstrsScheduled; |
| 140 | #endif |
| 141 | return true; |
| 142 | } |
| 143 | |
| 144 | /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after |
| 145 | /// crossing a scheduling boundary. [begin, end) includes all instructions in |
| 146 | /// the region, including the boundary itself and single-instruction regions |
| 147 | /// that don't get scheduled. |
| 148 | void VLIWMachineScheduler::enterRegion(MachineBasicBlock *bb, |
| 149 | MachineBasicBlock::iterator begin, |
| 150 | MachineBasicBlock::iterator end, |
| 151 | unsigned endcount) |
| 152 | { |
| 153 | ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); |
| 154 | |
| 155 | // For convenience remember the end of the liveness region. |
| 156 | LiveRegionEnd = |
| 157 | (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); |
| 158 | } |
| 159 | |
| 160 | // Setup the register pressure trackers for the top scheduled top and bottom |
| 161 | // scheduled regions. |
| 162 | void VLIWMachineScheduler::initRegPressure() { |
| 163 | TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); |
| 164 | BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); |
| 165 | |
| 166 | // Close the RPTracker to finalize live ins. |
| 167 | RPTracker.closeRegion(); |
| 168 | |
| 169 | DEBUG(RPTracker.getPressure().dump(TRI)); |
| 170 | |
| 171 | // Initialize the live ins and live outs. |
| 172 | TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); |
| 173 | BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); |
| 174 | |
| 175 | // Close one end of the tracker so we can call |
| 176 | // getMaxUpward/DownwardPressureDelta before advancing across any |
| 177 | // instructions. This converts currently live regs into live ins/outs. |
| 178 | TopRPTracker.closeTop(); |
| 179 | BotRPTracker.closeBottom(); |
| 180 | |
| 181 | // Account for liveness generated by the region boundary. |
| 182 | if (LiveRegionEnd != RegionEnd) |
| 183 | BotRPTracker.recede(); |
| 184 | |
| 185 | assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); |
| 186 | |
| 187 | // Cache the list of excess pressure sets in this region. This will also track |
| 188 | // the max pressure in the scheduled code for these sets. |
| 189 | RegionCriticalPSets.clear(); |
| 190 | std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; |
| 191 | for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { |
| 192 | unsigned Limit = TRI->getRegPressureSetLimit(i); |
| 193 | if (RegionPressure[i] > Limit) |
| 194 | RegionCriticalPSets.push_back(PressureElement(i, 0)); |
| 195 | } |
| 196 | DEBUG(dbgs() << "Excess PSets: "; |
| 197 | for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) |
| 198 | dbgs() << TRI->getRegPressureSetName( |
| 199 | RegionCriticalPSets[i].PSetID) << " "; |
| 200 | dbgs() << "\n"); |
| 201 | |
| 202 | // Reset resource state. |
| 203 | TopResourceModel->resetPacketState(); |
| 204 | TopResourceModel->resetDFA(); |
| 205 | BotResourceModel->resetPacketState(); |
| 206 | BotResourceModel->resetDFA(); |
| 207 | TotalPackets = 0; |
| 208 | } |
| 209 | |
| 210 | // FIXME: When the pressure tracker deals in pressure differences then we won't |
| 211 | // iterate over all RegionCriticalPSets[i]. |
| 212 | void VLIWMachineScheduler:: |
| 213 | updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { |
| 214 | for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { |
| 215 | unsigned ID = RegionCriticalPSets[i].PSetID; |
| 216 | int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; |
| 217 | if ((int)NewMaxPressure[ID] > MaxUnits) |
| 218 | MaxUnits = NewMaxPressure[ID]; |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | /// Check if scheduling of this SU is possible |
| 223 | /// in the current packet. |
| 224 | /// It is _not_ precise (statefull), it is more like |
| 225 | /// another heuristic. Many corner cases are figured |
| 226 | /// empirically. |
| 227 | bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { |
| 228 | if (!SU || !SU->getInstr()) |
| 229 | return false; |
| 230 | |
| 231 | // First see if the pipeline could receive this instruction |
| 232 | // in the current cycle. |
| 233 | switch (SU->getInstr()->getOpcode()) { |
| 234 | default: |
| 235 | if (!ResourcesModel->canReserveResources(SU->getInstr())) |
| 236 | return false; |
| 237 | case TargetOpcode::EXTRACT_SUBREG: |
| 238 | case TargetOpcode::INSERT_SUBREG: |
| 239 | case TargetOpcode::SUBREG_TO_REG: |
| 240 | case TargetOpcode::REG_SEQUENCE: |
| 241 | case TargetOpcode::IMPLICIT_DEF: |
| 242 | case TargetOpcode::COPY: |
| 243 | case TargetOpcode::INLINEASM: |
| 244 | break; |
| 245 | } |
| 246 | |
| 247 | // Now see if there are no other dependencies to instructions already |
| 248 | // in the packet. |
| 249 | for (unsigned i = 0, e = Packet.size(); i != e; ++i) { |
| 250 | if (Packet[i]->Succs.size() == 0) |
| 251 | continue; |
| 252 | for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), |
| 253 | E = Packet[i]->Succs.end(); I != E; ++I) { |
| 254 | // Since we do not add pseudos to packets, might as well |
| 255 | // ignore order dependencies. |
| 256 | if (I->isCtrl()) |
| 257 | continue; |
| 258 | |
| 259 | if (I->getSUnit() == SU) |
| 260 | return false; |
| 261 | } |
| 262 | } |
| 263 | return true; |
| 264 | } |
| 265 | |
| 266 | /// Keep track of available resources. |
| 267 | void VLIWResourceModel::reserveResources(SUnit *SU) { |
| 268 | // If this SU does not fit in the packet |
| 269 | // start a new one. |
| 270 | if (!isResourceAvailable(SU)) { |
| 271 | ResourcesModel->clearResources(); |
| 272 | Packet.clear(); |
| 273 | TotalPackets++; |
| 274 | } |
| 275 | |
| 276 | switch (SU->getInstr()->getOpcode()) { |
| 277 | default: |
| 278 | ResourcesModel->reserveResources(SU->getInstr()); |
| 279 | break; |
| 280 | case TargetOpcode::EXTRACT_SUBREG: |
| 281 | case TargetOpcode::INSERT_SUBREG: |
| 282 | case TargetOpcode::SUBREG_TO_REG: |
| 283 | case TargetOpcode::REG_SEQUENCE: |
| 284 | case TargetOpcode::IMPLICIT_DEF: |
| 285 | case TargetOpcode::KILL: |
| 286 | case TargetOpcode::PROLOG_LABEL: |
| 287 | case TargetOpcode::EH_LABEL: |
| 288 | case TargetOpcode::COPY: |
| 289 | case TargetOpcode::INLINEASM: |
| 290 | break; |
| 291 | } |
| 292 | Packet.push_back(SU); |
| 293 | |
| 294 | #ifndef NDEBUG |
| 295 | DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); |
| 296 | for (unsigned i = 0, e = Packet.size(); i != e; ++i) { |
| 297 | DEBUG(dbgs() << "\t[" << i << "] SU("); |
| 298 | DEBUG(dbgs() << Packet[i]->NodeNum << ")\n"); |
| 299 | } |
| 300 | #endif |
| 301 | |
| 302 | // If packet is now full, reset the state so in the next cycle |
| 303 | // we start fresh. |
| 304 | if (Packet.size() >= InstrItins->SchedModel->IssueWidth) { |
| 305 | ResourcesModel->clearResources(); |
| 306 | Packet.clear(); |
| 307 | TotalPackets++; |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | // Release all DAG roots for scheduling. |
| 312 | void VLIWMachineScheduler::releaseRoots() { |
| 313 | SmallVector<SUnit*, 16> BotRoots; |
| 314 | |
| 315 | for (std::vector<SUnit>::iterator |
| 316 | I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { |
| 317 | // A SUnit is ready to top schedule if it has no predecessors. |
| 318 | if (I->Preds.empty()) |
| 319 | SchedImpl->releaseTopNode(&(*I)); |
| 320 | // A SUnit is ready to bottom schedule if it has no successors. |
| 321 | if (I->Succs.empty()) |
| 322 | BotRoots.push_back(&(*I)); |
| 323 | } |
| 324 | // Release bottom roots in reverse order so the higher priority nodes appear |
| 325 | // first. This is more natural and slightly more efficient. |
| 326 | for (SmallVectorImpl<SUnit*>::const_reverse_iterator |
| 327 | I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) |
| 328 | SchedImpl->releaseBottomNode(*I); |
| 329 | } |
| 330 | |
| 331 | /// schedule - Called back from MachineScheduler::runOnMachineFunction |
| 332 | /// after setting up the current scheduling region. [RegionBegin, RegionEnd) |
| 333 | /// only includes instructions that have DAG nodes, not scheduling boundaries. |
| 334 | void VLIWMachineScheduler::schedule() { |
| 335 | DEBUG(dbgs() |
| 336 | << "********** MI Converging Scheduling VLIW BB#" << BB->getNumber() |
| 337 | << " " << BB->getName() |
| 338 | << " in_func " << BB->getParent()->getFunction()->getName() |
| 339 | << " at loop depth " << MLI->getLoopDepth(BB) |
| 340 | << " \n"); |
| 341 | |
| 342 | // Initialize the register pressure tracker used by buildSchedGraph. |
| 343 | RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); |
| 344 | |
| 345 | // Account for liveness generate by the region boundary. |
| 346 | if (LiveRegionEnd != RegionEnd) |
| 347 | RPTracker.recede(); |
| 348 | |
| 349 | // Build the DAG, and compute current register pressure. |
| 350 | buildSchedGraph(AA, &RPTracker); |
| 351 | |
| 352 | // Initialize top/bottom trackers after computing region pressure. |
| 353 | initRegPressure(); |
| 354 | |
| 355 | DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) |
| 356 | SUnits[su].dumpAll(this)); |
| 357 | |
| 358 | if (ViewMISchedDAGs) viewGraph(); |
| 359 | |
| 360 | SchedImpl->initialize(this); |
| 361 | |
| 362 | // Release edges from the special Entry node or to the special Exit node. |
| 363 | releaseSuccessors(&EntrySU); |
| 364 | releasePredecessors(&ExitSU); |
| 365 | |
| 366 | // Release all DAG roots for scheduling. |
| 367 | releaseRoots(); |
| 368 | |
| 369 | CurrentTop = nextIfDebug(RegionBegin, RegionEnd); |
| 370 | CurrentBottom = RegionEnd; |
| 371 | bool IsTopNode = false; |
| 372 | while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { |
| 373 | if (!checkSchedLimit()) |
| 374 | break; |
| 375 | |
| 376 | // Move the instruction to its new location in the instruction stream. |
| 377 | MachineInstr *MI = SU->getInstr(); |
| 378 | |
| 379 | if (IsTopNode) { |
| 380 | assert(SU->isTopReady() && "node still has unscheduled dependencies"); |
| 381 | if (&*CurrentTop == MI) |
| 382 | CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); |
| 383 | else { |
| 384 | moveInstruction(MI, CurrentTop); |
| 385 | TopRPTracker.setPos(MI); |
| 386 | } |
| 387 | |
| 388 | // Update top scheduled pressure. |
| 389 | TopRPTracker.advance(); |
| 390 | assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); |
| 391 | updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); |
| 392 | |
| 393 | // Update DFA state. |
| 394 | TopResourceModel->reserveResources(SU); |
| 395 | |
| 396 | // Release dependent instructions for scheduling. |
| 397 | releaseSuccessors(SU); |
| 398 | } |
| 399 | else { |
| 400 | assert(SU->isBottomReady() && "node still has unscheduled dependencies"); |
| 401 | MachineBasicBlock::iterator priorII = |
| 402 | priorNonDebug(CurrentBottom, CurrentTop); |
| 403 | if (&*priorII == MI) |
| 404 | CurrentBottom = priorII; |
| 405 | else { |
| 406 | if (&*CurrentTop == MI) { |
| 407 | CurrentTop = nextIfDebug(++CurrentTop, priorII); |
| 408 | TopRPTracker.setPos(CurrentTop); |
| 409 | } |
| 410 | moveInstruction(MI, CurrentBottom); |
| 411 | CurrentBottom = MI; |
| 412 | } |
| 413 | // Update bottom scheduled pressure. |
| 414 | BotRPTracker.recede(); |
| 415 | assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); |
| 416 | updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); |
| 417 | |
| 418 | // Update DFA state. |
| 419 | BotResourceModel->reserveResources(SU); |
| 420 | |
| 421 | // Release dependent instructions for scheduling. |
| 422 | releasePredecessors(SU); |
| 423 | } |
| 424 | SU->isScheduled = true; |
| 425 | SchedImpl->schedNode(SU, IsTopNode); |
| 426 | } |
| 427 | assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); |
| 428 | |
| 429 | DEBUG(dbgs() << "Final schedule has " << TopResourceModel->getTotalPackets() + |
| 430 | BotResourceModel->getTotalPackets()<< "packets.\n"); |
| 431 | |
| 432 | placeDebugValues(); |
| 433 | } |
| 434 | |
| 435 | /// Reinsert any remaining debug_values, just like the PostRA scheduler. |
| 436 | void VLIWMachineScheduler::placeDebugValues() { |
| 437 | // If first instruction was a DBG_VALUE then put it back. |
| 438 | if (FirstDbgValue) { |
| 439 | BB->splice(RegionBegin, BB, FirstDbgValue); |
| 440 | RegionBegin = FirstDbgValue; |
| 441 | } |
| 442 | |
| 443 | for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator |
| 444 | DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { |
| 445 | std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); |
| 446 | MachineInstr *DbgValue = P.first; |
| 447 | MachineBasicBlock::iterator OrigPrevMI = P.second; |
| 448 | BB->splice(++OrigPrevMI, BB, DbgValue); |
| 449 | if (OrigPrevMI == llvm::prior(RegionEnd)) |
| 450 | RegionEnd = DbgValue; |
| 451 | } |
| 452 | DbgValues.clear(); |
| 453 | FirstDbgValue = NULL; |
| 454 | } |
| 455 | |
| 456 | void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) { |
| 457 | DAG = dag; |
| 458 | TRI = DAG->TRI; |
| 459 | Top.DAG = dag; |
| 460 | Bot.DAG = dag; |
| 461 | |
| 462 | // Initialize the HazardRecognizers. |
| 463 | const TargetMachine &TM = DAG->MF.getTarget(); |
| 464 | const InstrItineraryData *Itin = TM.getInstrItineraryData(); |
| 465 | Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); |
| 466 | Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); |
| 467 | |
| 468 | assert((!ForceTopDown || !ForceBottomUp) && |
| 469 | "-misched-topdown incompatible with -misched-bottomup"); |
| 470 | } |
| 471 | |
| 472 | void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { |
| 473 | if (SU->isScheduled) |
| 474 | return; |
| 475 | |
| 476 | for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); |
| 477 | I != E; ++I) { |
| 478 | unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; |
| 479 | unsigned MinLatency = I->getMinLatency(); |
| 480 | #ifndef NDEBUG |
| 481 | Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); |
| 482 | #endif |
| 483 | if (SU->TopReadyCycle < PredReadyCycle + MinLatency) |
| 484 | SU->TopReadyCycle = PredReadyCycle + MinLatency; |
| 485 | } |
| 486 | Top.releaseNode(SU, SU->TopReadyCycle); |
| 487 | } |
| 488 | |
| 489 | void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { |
| 490 | if (SU->isScheduled) |
| 491 | return; |
| 492 | |
| 493 | assert(SU->getInstr() && "Scheduled SUnit must have instr"); |
| 494 | |
| 495 | for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); |
| 496 | I != E; ++I) { |
| 497 | unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; |
| 498 | unsigned MinLatency = I->getMinLatency(); |
| 499 | #ifndef NDEBUG |
| 500 | Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); |
| 501 | #endif |
| 502 | if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) |
| 503 | SU->BotReadyCycle = SuccReadyCycle + MinLatency; |
| 504 | } |
| 505 | Bot.releaseNode(SU, SU->BotReadyCycle); |
| 506 | } |
| 507 | |
| 508 | /// Does this SU have a hazard within the current instruction group. |
| 509 | /// |
| 510 | /// The scheduler supports two modes of hazard recognition. The first is the |
| 511 | /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that |
| 512 | /// supports highly complicated in-order reservation tables |
| 513 | /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. |
| 514 | /// |
| 515 | /// The second is a streamlined mechanism that checks for hazards based on |
| 516 | /// simple counters that the scheduler itself maintains. It explicitly checks |
| 517 | /// for instruction dispatch limitations, including the number of micro-ops that |
| 518 | /// can dispatch per cycle. |
| 519 | /// |
| 520 | /// TODO: Also check whether the SU must start a new group. |
| 521 | bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) { |
| 522 | if (HazardRec->isEnabled()) |
| 523 | return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; |
| 524 | |
| 525 | if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth()) |
| 526 | return true; |
| 527 | |
| 528 | return false; |
| 529 | } |
| 530 | |
| 531 | void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU, |
| 532 | unsigned ReadyCycle) { |
| 533 | if (ReadyCycle < MinReadyCycle) |
| 534 | MinReadyCycle = ReadyCycle; |
| 535 | |
| 536 | // Check for interlocks first. For the purpose of other heuristics, an |
| 537 | // instruction that cannot issue appears as if it's not in the ReadyQueue. |
| 538 | if (ReadyCycle > CurrCycle || checkHazard(SU)) |
| 539 | |
| 540 | Pending.push(SU); |
| 541 | else |
| 542 | Available.push(SU); |
| 543 | } |
| 544 | |
| 545 | /// Move the boundary of scheduled code by one cycle. |
| 546 | void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() { |
| 547 | unsigned Width = DAG->getIssueWidth(); |
| 548 | IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; |
| 549 | |
| 550 | assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); |
| 551 | unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); |
| 552 | |
| 553 | if (!HazardRec->isEnabled()) { |
| 554 | // Bypass HazardRec virtual calls. |
| 555 | CurrCycle = NextCycle; |
| 556 | } |
| 557 | else { |
| 558 | // Bypass getHazardType calls in case of long latency. |
| 559 | for (; CurrCycle != NextCycle; ++CurrCycle) { |
| 560 | if (isTop()) |
| 561 | HazardRec->AdvanceCycle(); |
| 562 | else |
| 563 | HazardRec->RecedeCycle(); |
| 564 | } |
| 565 | } |
| 566 | CheckPending = true; |
| 567 | |
| 568 | DEBUG(dbgs() << "*** " << Available.getName() << " cycle " |
| 569 | << CurrCycle << '\n'); |
| 570 | } |
| 571 | |
| 572 | /// Move the boundary of scheduled code by one SUnit. |
| 573 | void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) { |
| 574 | |
| 575 | // Update the reservation table. |
| 576 | if (HazardRec->isEnabled()) { |
| 577 | if (!isTop() && SU->isCall) { |
| 578 | // Calls are scheduled with their preceding instructions. For bottom-up |
| 579 | // scheduling, clear the pipeline state before emitting. |
| 580 | HazardRec->Reset(); |
| 581 | } |
| 582 | HazardRec->EmitInstruction(SU); |
| 583 | } |
| 584 | // Check the instruction group dispatch limit. |
| 585 | // TODO: Check if this SU must end a dispatch group. |
| 586 | IssueCount += DAG->getNumMicroOps(SU->getInstr()); |
| 587 | if (IssueCount >= DAG->getIssueWidth()) { |
| 588 | DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); |
| 589 | bumpCycle(); |
| 590 | } |
| 591 | } |
| 592 | |
| 593 | /// Release pending ready nodes in to the available queue. This makes them |
| 594 | /// visible to heuristics. |
| 595 | void ConvergingVLIWScheduler::SchedBoundary::releasePending() { |
| 596 | // If the available queue is empty, it is safe to reset MinReadyCycle. |
| 597 | if (Available.empty()) |
| 598 | MinReadyCycle = UINT_MAX; |
| 599 | |
| 600 | // Check to see if any of the pending instructions are ready to issue. If |
| 601 | // so, add them to the available queue. |
| 602 | for (unsigned i = 0, e = Pending.size(); i != e; ++i) { |
| 603 | SUnit *SU = *(Pending.begin()+i); |
| 604 | unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; |
| 605 | |
| 606 | if (ReadyCycle < MinReadyCycle) |
| 607 | MinReadyCycle = ReadyCycle; |
| 608 | |
| 609 | if (ReadyCycle > CurrCycle) |
| 610 | continue; |
| 611 | |
| 612 | if (checkHazard(SU)) |
| 613 | continue; |
| 614 | |
| 615 | Available.push(SU); |
| 616 | Pending.remove(Pending.begin()+i); |
| 617 | --i; --e; |
| 618 | } |
| 619 | CheckPending = false; |
| 620 | } |
| 621 | |
| 622 | /// Remove SU from the ready set for this boundary. |
| 623 | void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) { |
| 624 | if (Available.isInQueue(SU)) |
| 625 | Available.remove(Available.find(SU)); |
| 626 | else { |
| 627 | assert(Pending.isInQueue(SU) && "bad ready count"); |
| 628 | Pending.remove(Pending.find(SU)); |
| 629 | } |
| 630 | } |
| 631 | |
| 632 | /// If this queue only has one ready candidate, return it. As a side effect, |
| 633 | /// advance the cycle until at least one node is ready. If multiple instructions |
| 634 | /// are ready, return NULL. |
| 635 | SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() { |
| 636 | if (CheckPending) |
| 637 | releasePending(); |
| 638 | |
| 639 | for (unsigned i = 0; Available.empty(); ++i) { |
| 640 | assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && |
| 641 | "permanent hazard"); (void)i; |
| 642 | bumpCycle(); |
| 643 | releasePending(); |
| 644 | } |
| 645 | if (Available.size() == 1) |
| 646 | return *Available.begin(); |
| 647 | return NULL; |
| 648 | } |
| 649 | |
| 650 | #ifndef NDEBUG |
| 651 | void ConvergingVLIWScheduler::traceCandidate(const char *Label, const ReadyQueue &Q, |
| 652 | SUnit *SU, PressureElement P) { |
| 653 | dbgs() << Label << " " << Q.getName() << " "; |
| 654 | if (P.isValid()) |
| 655 | dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease |
| 656 | << " "; |
| 657 | else |
| 658 | dbgs() << " "; |
| 659 | SU->dump(DAG); |
| 660 | } |
| 661 | #endif |
| 662 | |
| 663 | // Constants used to denote relative importance of |
| 664 | // heuristic components for cost computation. |
| 665 | static const unsigned PriorityOne = 200; |
| 666 | static const unsigned PriorityThree = 50; |
| 667 | static const unsigned ScaleTwo = 10; |
| 668 | static const unsigned FactorOne = 2; |
| 669 | |
| 670 | /// Single point to compute overall scheduling cost. |
| 671 | /// TODO: More heuristics will be used soon. |
| 672 | int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, |
| 673 | SchedCandidate &Candidate, |
| 674 | RegPressureDelta &Delta, |
| 675 | bool verbose) { |
| 676 | // Initial trivial priority. |
| 677 | int ResCount = 1; |
| 678 | |
| 679 | // Do not waste time on a node that is already scheduled. |
| 680 | if (!SU || SU->isScheduled) |
| 681 | return ResCount; |
| 682 | |
| 683 | // Forced priority is high. |
| 684 | if (SU->isScheduleHigh) |
| 685 | ResCount += PriorityOne; |
| 686 | |
| 687 | // Critical path first. |
| 688 | if (Q.getID() == TopQID) |
| 689 | ResCount += (SU->getHeight() * ScaleTwo); |
| 690 | else |
| 691 | ResCount += (SU->getDepth() * ScaleTwo); |
| 692 | |
| 693 | // If resources are available for it, multiply the |
| 694 | // chance of scheduling. |
| 695 | if (DAG->getTopResourceModel()->isResourceAvailable(SU)) |
| 696 | ResCount <<= FactorOne; |
| 697 | |
| 698 | // Factor in reg pressure as a heuristic. |
| 699 | ResCount -= (Delta.Excess.UnitIncrease * PriorityThree); |
| 700 | ResCount -= (Delta.CriticalMax.UnitIncrease * PriorityThree); |
| 701 | |
| 702 | DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")"); |
| 703 | |
| 704 | return ResCount; |
| 705 | } |
| 706 | |
| 707 | /// Pick the best candidate from the top queue. |
| 708 | /// |
| 709 | /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during |
| 710 | /// DAG building. To adjust for the current scheduling location we need to |
| 711 | /// maintain the number of vreg uses remaining to be top-scheduled. |
| 712 | ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: |
| 713 | pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, |
| 714 | SchedCandidate &Candidate) { |
| 715 | DEBUG(Q.dump()); |
| 716 | |
| 717 | // getMaxPressureDelta temporarily modifies the tracker. |
| 718 | RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); |
| 719 | |
| 720 | // BestSU remains NULL if no top candidates beat the best existing candidate. |
| 721 | CandResult FoundCandidate = NoCand; |
| 722 | for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { |
| 723 | RegPressureDelta RPDelta; |
| 724 | TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, |
| 725 | DAG->getRegionCriticalPSets(), |
| 726 | DAG->getRegPressure().MaxSetPressure); |
| 727 | |
| 728 | int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); |
| 729 | |
| 730 | // Initialize the candidate if needed. |
| 731 | if (!Candidate.SU) { |
| 732 | Candidate.SU = *I; |
| 733 | Candidate.RPDelta = RPDelta; |
| 734 | Candidate.SCost = CurrentCost; |
| 735 | FoundCandidate = NodeOrder; |
| 736 | continue; |
| 737 | } |
| 738 | |
| 739 | |
| 740 | // Best cost. |
| 741 | if (CurrentCost > Candidate.SCost) { |
| 742 | DEBUG(traceCandidate("CCAND", Q, *I)); |
| 743 | Candidate.SU = *I; |
| 744 | Candidate.RPDelta = RPDelta; |
| 745 | Candidate.SCost = CurrentCost; |
| 746 | FoundCandidate = BestCost; |
| 747 | continue; |
| 748 | } |
| 749 | |
| 750 | // Fall through to original instruction order. |
| 751 | // Only consider node order if Candidate was chosen from this Q. |
| 752 | if (FoundCandidate == NoCand) |
| 753 | continue; |
| 754 | } |
| 755 | return FoundCandidate; |
| 756 | } |
| 757 | |
| 758 | /// Pick the best candidate node from either the top or bottom queue. |
| 759 | SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { |
| 760 | // Schedule as far as possible in the direction of no choice. This is most |
| 761 | // efficient, but also provides the best heuristics for CriticalPSets. |
| 762 | if (SUnit *SU = Bot.pickOnlyChoice()) { |
| 763 | IsTopNode = false; |
| 764 | return SU; |
| 765 | } |
| 766 | if (SUnit *SU = Top.pickOnlyChoice()) { |
| 767 | IsTopNode = true; |
| 768 | return SU; |
| 769 | } |
| 770 | SchedCandidate BotCand; |
| 771 | // Prefer bottom scheduling when heuristics are silent. |
| 772 | CandResult BotResult = pickNodeFromQueue(Bot.Available, |
| 773 | DAG->getBotRPTracker(), BotCand); |
| 774 | assert(BotResult != NoCand && "failed to find the first candidate"); |
| 775 | |
| 776 | // If either Q has a single candidate that provides the least increase in |
| 777 | // Excess pressure, we can immediately schedule from that Q. |
| 778 | // |
| 779 | // RegionCriticalPSets summarizes the pressure within the scheduled region and |
| 780 | // affects picking from either Q. If scheduling in one direction must |
| 781 | // increase pressure for one of the excess PSets, then schedule in that |
| 782 | // direction first to provide more freedom in the other direction. |
| 783 | if (BotResult == SingleExcess || BotResult == SingleCritical) { |
| 784 | IsTopNode = false; |
| 785 | return BotCand.SU; |
| 786 | } |
| 787 | // Check if the top Q has a better candidate. |
| 788 | SchedCandidate TopCand; |
| 789 | CandResult TopResult = pickNodeFromQueue(Top.Available, |
| 790 | DAG->getTopRPTracker(), TopCand); |
| 791 | assert(TopResult != NoCand && "failed to find the first candidate"); |
| 792 | |
| 793 | if (TopResult == SingleExcess || TopResult == SingleCritical) { |
| 794 | IsTopNode = true; |
| 795 | return TopCand.SU; |
| 796 | } |
| 797 | // If either Q has a single candidate that minimizes pressure above the |
| 798 | // original region's pressure pick it. |
| 799 | if (BotResult == SingleMax) { |
| 800 | IsTopNode = false; |
| 801 | return BotCand.SU; |
| 802 | } |
| 803 | if (TopResult == SingleMax) { |
| 804 | IsTopNode = true; |
| 805 | return TopCand.SU; |
| 806 | } |
| 807 | if (TopCand.SCost > BotCand.SCost) { |
| 808 | IsTopNode = true; |
| 809 | return TopCand.SU; |
| 810 | } |
| 811 | // Otherwise prefer the bottom candidate in node order. |
| 812 | IsTopNode = false; |
| 813 | return BotCand.SU; |
| 814 | } |
| 815 | |
| 816 | /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. |
| 817 | SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { |
| 818 | if (DAG->top() == DAG->bottom()) { |
| 819 | assert(Top.Available.empty() && Top.Pending.empty() && |
| 820 | Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); |
| 821 | return NULL; |
| 822 | } |
| 823 | SUnit *SU; |
| 824 | if (ForceTopDown) { |
| 825 | SU = Top.pickOnlyChoice(); |
| 826 | if (!SU) { |
| 827 | SchedCandidate TopCand; |
| 828 | CandResult TopResult = |
| 829 | pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); |
| 830 | assert(TopResult != NoCand && "failed to find the first candidate"); |
| 831 | (void)TopResult; |
| 832 | SU = TopCand.SU; |
| 833 | } |
| 834 | IsTopNode = true; |
| 835 | } else if (ForceBottomUp) { |
| 836 | SU = Bot.pickOnlyChoice(); |
| 837 | if (!SU) { |
| 838 | SchedCandidate BotCand; |
| 839 | CandResult BotResult = |
| 840 | pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); |
| 841 | assert(BotResult != NoCand && "failed to find the first candidate"); |
| 842 | (void)BotResult; |
| 843 | SU = BotCand.SU; |
| 844 | } |
| 845 | IsTopNode = false; |
| 846 | } else { |
| 847 | SU = pickNodeBidrectional(IsTopNode); |
| 848 | } |
| 849 | if (SU->isTopReady()) |
| 850 | Top.removeReady(SU); |
| 851 | if (SU->isBottomReady()) |
| 852 | Bot.removeReady(SU); |
| 853 | |
| 854 | DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") |
| 855 | << " Scheduling Instruction in cycle " |
| 856 | << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; |
| 857 | SU->dump(DAG)); |
| 858 | return SU; |
| 859 | } |
| 860 | |
| 861 | /// Update the scheduler's state after scheduling a node. This is the same node |
| 862 | /// that was just returned by pickNode(). However, VLIWMachineScheduler needs to update |
| 863 | /// it's state based on the current cycle before MachineSchedStrategy does. |
| 864 | void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { |
| 865 | if (IsTopNode) { |
| 866 | SU->TopReadyCycle = Top.CurrCycle; |
| 867 | Top.bumpNode(SU); |
| 868 | } |
| 869 | else { |
| 870 | SU->BotReadyCycle = Bot.CurrCycle; |
| 871 | Bot.bumpNode(SU); |
| 872 | } |
| 873 | } |
| 874 | |