Blame - lib/Target/Hexagon/HexagonMachineScheduler.cpp - fp2-dev/platform/external/llvm

blob: 6a37639889a71ad29b9506f667a104df99a3c336 [file] [log] [blame]

Sergei Larin	3e59040	2012-09-04 14:49:56 +0000	[diff] [blame]	1	//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// MachineScheduler schedules machine instructions after phi elimination. It
				11	// preserves LiveIntervals so it can be invoked before register allocation.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#define DEBUG_TYPE "misched"
				16
				17	#include "HexagonMachineScheduler.h"
				18
				19	#include <queue>
				20
				21	using namespace llvm;
				22
				23	static cl::opt<bool> ForceTopDown("vliw-misched-topdown", cl::Hidden,
				24	cl::desc("Force top-down list scheduling"));
				25	static cl::opt<bool> ForceBottomUp("vliw-misched-bottomup", cl::Hidden,
				26	cl::desc("Force bottom-up list scheduling"));
				27
				28	#ifndef NDEBUG
				29	static cl::opt<bool> ViewMISchedDAGs("vliw-view-misched-dags", cl::Hidden,
				30	cl::desc("Pop up a window to show MISched dags after they are processed"));
				31
				32	static cl::opt<unsigned> MISchedCutoff("vliw-misched-cutoff", cl::Hidden,
				33	cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
				34	#else
				35	static bool ViewMISchedDAGs = false;
				36	#endif // NDEBUG
				37
				38	/// Decrement this iterator until reaching the top or a non-debug instr.
				39	static MachineBasicBlock::iterator
				40	priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
				41	assert(I != Beg && "reached the top of the region, cannot decrement");
				42	while (--I != Beg) {
				43	if (!I->isDebugValue())
				44	break;
				45	}
				46	return I;
				47	}
				48
				49	/// If this iterator is a debug value, increment until reaching the End or a
				50	/// non-debug instruction.
				51	static MachineBasicBlock::iterator
				52	nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
				53	for(; I != End; ++I) {
				54	if (!I->isDebugValue())
				55	break;
				56	}
				57	return I;
				58	}
				59
				60	/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
				61	/// NumPredsLeft reaches zero, release the successor node.
				62	///
				63	/// FIXME: Adjust SuccSU height based on MinLatency.
				64	void VLIWMachineScheduler::releaseSucc(SUnit SU, SDep SuccEdge) {
				65	SUnit *SuccSU = SuccEdge->getSUnit();
				66
				67	#ifndef NDEBUG
				68	if (SuccSU->NumPredsLeft == 0) {
				69	dbgs() << "* Scheduling failed! *\n";
				70	SuccSU->dump(this);
				71	dbgs() << " has been released too many times!\n";
				72	llvm_unreachable(0);
				73	}
				74	#endif
				75	--SuccSU->NumPredsLeft;
				76	if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
				77	SchedImpl->releaseTopNode(SuccSU);
				78	}
				79
				80	/// releaseSuccessors - Call releaseSucc on each of SU's successors.
				81	void VLIWMachineScheduler::releaseSuccessors(SUnit *SU) {
				82	for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
				83	I != E; ++I) {
				84	releaseSucc(SU, &*I);
				85	}
				86	}
				87
				88	/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
				89	/// NumSuccsLeft reaches zero, release the predecessor node.
				90	///
				91	/// FIXME: Adjust PredSU height based on MinLatency.
				92	void VLIWMachineScheduler::releasePred(SUnit SU, SDep PredEdge) {
				93	SUnit *PredSU = PredEdge->getSUnit();
				94
				95	#ifndef NDEBUG
				96	if (PredSU->NumSuccsLeft == 0) {
				97	dbgs() << "* Scheduling failed! *\n";
				98	PredSU->dump(this);
				99	dbgs() << " has been released too many times!\n";
				100	llvm_unreachable(0);
				101	}
				102	#endif
				103	--PredSU->NumSuccsLeft;
				104	if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
				105	SchedImpl->releaseBottomNode(PredSU);
				106	}
				107
				108	/// releasePredecessors - Call releasePred on each of SU's predecessors.
				109	void VLIWMachineScheduler::releasePredecessors(SUnit *SU) {
				110	for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
				111	I != E; ++I) {
				112	releasePred(SU, &*I);
				113	}
				114	}
				115
				116	void VLIWMachineScheduler::moveInstruction(MachineInstr *MI,
				117	MachineBasicBlock::iterator InsertPos) {
				118	// Advance RegionBegin if the first instruction moves down.
				119	if (&*RegionBegin == MI)
				120	++RegionBegin;
				121
				122	// Update the instruction stream.
				123	BB->splice(InsertPos, BB, MI);
				124
				125	// Update LiveIntervals
				126	LIS->handleMove(MI);
				127
				128	// Recede RegionBegin if an instruction moves above the first.
				129	if (RegionBegin == InsertPos)
				130	RegionBegin = MI;
				131	}
				132
				133	bool VLIWMachineScheduler::checkSchedLimit() {
				134	#ifndef NDEBUG
				135	if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
				136	CurrentTop = CurrentBottom;
				137	return false;
				138	}
				139	++NumInstrsScheduled;
				140	#endif
				141	return true;
				142	}
				143
				144	/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
				145	/// crossing a scheduling boundary. [begin, end) includes all instructions in
				146	/// the region, including the boundary itself and single-instruction regions
				147	/// that don't get scheduled.
				148	void VLIWMachineScheduler::enterRegion(MachineBasicBlock *bb,
				149	MachineBasicBlock::iterator begin,
				150	MachineBasicBlock::iterator end,
				151	unsigned endcount)
				152	{
				153	ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
				154
				155	// For convenience remember the end of the liveness region.
				156	LiveRegionEnd =
				157	(RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
				158	}
				159
				160	// Setup the register pressure trackers for the top scheduled top and bottom
				161	// scheduled regions.
				162	void VLIWMachineScheduler::initRegPressure() {
				163	TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
				164	BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
				165
				166	// Close the RPTracker to finalize live ins.
				167	RPTracker.closeRegion();
				168
				169	DEBUG(RPTracker.getPressure().dump(TRI));
				170
				171	// Initialize the live ins and live outs.
				172	TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
				173	BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
				174
				175	// Close one end of the tracker so we can call
				176	// getMaxUpward/DownwardPressureDelta before advancing across any
				177	// instructions. This converts currently live regs into live ins/outs.
				178	TopRPTracker.closeTop();
				179	BotRPTracker.closeBottom();
				180
				181	// Account for liveness generated by the region boundary.
				182	if (LiveRegionEnd != RegionEnd)
				183	BotRPTracker.recede();
				184
				185	assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
				186
				187	// Cache the list of excess pressure sets in this region. This will also track
				188	// the max pressure in the scheduled code for these sets.
				189	RegionCriticalPSets.clear();
				190	std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
				191	for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
				192	unsigned Limit = TRI->getRegPressureSetLimit(i);
				193	if (RegionPressure[i] > Limit)
				194	RegionCriticalPSets.push_back(PressureElement(i, 0));
				195	}
				196	DEBUG(dbgs() << "Excess PSets: ";
				197	for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
				198	dbgs() << TRI->getRegPressureSetName(
				199	RegionCriticalPSets[i].PSetID) << " ";
				200	dbgs() << "\n");
				201
				202	// Reset resource state.
				203	TopResourceModel->resetPacketState();
				204	TopResourceModel->resetDFA();
				205	BotResourceModel->resetPacketState();
				206	BotResourceModel->resetDFA();
				207	TotalPackets = 0;
				208	}
				209
				210	// FIXME: When the pressure tracker deals in pressure differences then we won't
				211	// iterate over all RegionCriticalPSets[i].
				212	void VLIWMachineScheduler::
				213	updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
				214	for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
				215	unsigned ID = RegionCriticalPSets[i].PSetID;
				216	int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
				217	if ((int)NewMaxPressure[ID] > MaxUnits)
				218	MaxUnits = NewMaxPressure[ID];
				219	}
				220	}
				221
				222	/// Check if scheduling of this SU is possible
				223	/// in the current packet.
				224	/// It is _not_ precise (statefull), it is more like
				225	/// another heuristic. Many corner cases are figured
				226	/// empirically.
				227	bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
				228	if (!SU \|\| !SU->getInstr())
				229	return false;
				230
				231	// First see if the pipeline could receive this instruction
				232	// in the current cycle.
				233	switch (SU->getInstr()->getOpcode()) {
				234	default:
				235	if (!ResourcesModel->canReserveResources(SU->getInstr()))
				236	return false;
				237	case TargetOpcode::EXTRACT_SUBREG:
				238	case TargetOpcode::INSERT_SUBREG:
				239	case TargetOpcode::SUBREG_TO_REG:
				240	case TargetOpcode::REG_SEQUENCE:
				241	case TargetOpcode::IMPLICIT_DEF:
				242	case TargetOpcode::COPY:
				243	case TargetOpcode::INLINEASM:
				244	break;
				245	}
				246
				247	// Now see if there are no other dependencies to instructions already
				248	// in the packet.
				249	for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
				250	if (Packet[i]->Succs.size() == 0)
				251	continue;
				252	for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
				253	E = Packet[i]->Succs.end(); I != E; ++I) {
				254	// Since we do not add pseudos to packets, might as well
				255	// ignore order dependencies.
				256	if (I->isCtrl())
				257	continue;
				258
				259	if (I->getSUnit() == SU)
				260	return false;
				261	}
				262	}
				263	return true;
				264	}
				265
				266	/// Keep track of available resources.
				267	void VLIWResourceModel::reserveResources(SUnit *SU) {
				268	// If this SU does not fit in the packet
				269	// start a new one.
				270	if (!isResourceAvailable(SU)) {
				271	ResourcesModel->clearResources();
				272	Packet.clear();
				273	TotalPackets++;
				274	}
				275
				276	switch (SU->getInstr()->getOpcode()) {
				277	default:
				278	ResourcesModel->reserveResources(SU->getInstr());
				279	break;
				280	case TargetOpcode::EXTRACT_SUBREG:
				281	case TargetOpcode::INSERT_SUBREG:
				282	case TargetOpcode::SUBREG_TO_REG:
				283	case TargetOpcode::REG_SEQUENCE:
				284	case TargetOpcode::IMPLICIT_DEF:
				285	case TargetOpcode::KILL:
				286	case TargetOpcode::PROLOG_LABEL:
				287	case TargetOpcode::EH_LABEL:
				288	case TargetOpcode::COPY:
				289	case TargetOpcode::INLINEASM:
				290	break;
				291	}
				292	Packet.push_back(SU);
				293
				294	#ifndef NDEBUG
				295	DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
				296	for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
				297	DEBUG(dbgs() << "\t[" << i << "] SU(");
				298	DEBUG(dbgs() << Packet[i]->NodeNum << ")\n");
				299	}
				300	#endif
				301
				302	// If packet is now full, reset the state so in the next cycle
				303	// we start fresh.
				304	if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
				305	ResourcesModel->clearResources();
				306	Packet.clear();
				307	TotalPackets++;
				308	}
				309	}
				310
				311	// Release all DAG roots for scheduling.
				312	void VLIWMachineScheduler::releaseRoots() {
				313	SmallVector<SUnit*, 16> BotRoots;
				314
				315	for (std::vector<SUnit>::iterator
				316	I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
				317	// A SUnit is ready to top schedule if it has no predecessors.
				318	if (I->Preds.empty())
				319	SchedImpl->releaseTopNode(&(*I));
				320	// A SUnit is ready to bottom schedule if it has no successors.
				321	if (I->Succs.empty())
				322	BotRoots.push_back(&(*I));
				323	}
				324	// Release bottom roots in reverse order so the higher priority nodes appear
				325	// first. This is more natural and slightly more efficient.
				326	for (SmallVectorImpl<SUnit*>::const_reverse_iterator
				327	I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
				328	SchedImpl->releaseBottomNode(*I);
				329	}
				330
				331	/// schedule - Called back from MachineScheduler::runOnMachineFunction
				332	/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
				333	/// only includes instructions that have DAG nodes, not scheduling boundaries.
				334	void VLIWMachineScheduler::schedule() {
				335	DEBUG(dbgs()
				336	<< "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
				337	<< " " << BB->getName()
				338	<< " in_func " << BB->getParent()->getFunction()->getName()
				339	<< " at loop depth " << MLI->getLoopDepth(BB)
				340	<< " \n");
				341
				342	// Initialize the register pressure tracker used by buildSchedGraph.
				343	RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
				344
				345	// Account for liveness generate by the region boundary.
				346	if (LiveRegionEnd != RegionEnd)
				347	RPTracker.recede();
				348
				349	// Build the DAG, and compute current register pressure.
				350	buildSchedGraph(AA, &RPTracker);
				351
				352	// Initialize top/bottom trackers after computing region pressure.
				353	initRegPressure();
				354
				355	DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
				356	SUnits[su].dumpAll(this));
				357
				358	if (ViewMISchedDAGs) viewGraph();
				359
				360	SchedImpl->initialize(this);
				361
				362	// Release edges from the special Entry node or to the special Exit node.
				363	releaseSuccessors(&EntrySU);
				364	releasePredecessors(&ExitSU);
				365
				366	// Release all DAG roots for scheduling.
				367	releaseRoots();
				368
				369	CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
				370	CurrentBottom = RegionEnd;
				371	bool IsTopNode = false;
				372	while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
				373	if (!checkSchedLimit())
				374	break;
				375
				376	// Move the instruction to its new location in the instruction stream.
				377	MachineInstr *MI = SU->getInstr();
				378
				379	if (IsTopNode) {
				380	assert(SU->isTopReady() && "node still has unscheduled dependencies");
				381	if (&*CurrentTop == MI)
				382	CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
				383	else {
				384	moveInstruction(MI, CurrentTop);
				385	TopRPTracker.setPos(MI);
				386	}
				387
				388	// Update top scheduled pressure.
				389	TopRPTracker.advance();
				390	assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
				391	updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
				392
				393	// Update DFA state.
				394	TopResourceModel->reserveResources(SU);
				395
				396	// Release dependent instructions for scheduling.
				397	releaseSuccessors(SU);
				398	}
				399	else {
				400	assert(SU->isBottomReady() && "node still has unscheduled dependencies");
				401	MachineBasicBlock::iterator priorII =
				402	priorNonDebug(CurrentBottom, CurrentTop);
				403	if (&*priorII == MI)
				404	CurrentBottom = priorII;
				405	else {
				406	if (&*CurrentTop == MI) {
				407	CurrentTop = nextIfDebug(++CurrentTop, priorII);
				408	TopRPTracker.setPos(CurrentTop);
				409	}
				410	moveInstruction(MI, CurrentBottom);
				411	CurrentBottom = MI;
				412	}
				413	// Update bottom scheduled pressure.
				414	BotRPTracker.recede();
				415	assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
				416	updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
				417
				418	// Update DFA state.
				419	BotResourceModel->reserveResources(SU);
				420
				421	// Release dependent instructions for scheduling.
				422	releasePredecessors(SU);
				423	}
				424	SU->isScheduled = true;
				425	SchedImpl->schedNode(SU, IsTopNode);
				426	}
				427	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
				428
				429	DEBUG(dbgs() << "Final schedule has " << TopResourceModel->getTotalPackets() +
				430	BotResourceModel->getTotalPackets()<< "packets.\n");
				431
				432	placeDebugValues();
				433	}
				434
				435	/// Reinsert any remaining debug_values, just like the PostRA scheduler.
				436	void VLIWMachineScheduler::placeDebugValues() {
				437	// If first instruction was a DBG_VALUE then put it back.
				438	if (FirstDbgValue) {
				439	BB->splice(RegionBegin, BB, FirstDbgValue);
				440	RegionBegin = FirstDbgValue;
				441	}
				442
				443	for (std::vector<std::pair<MachineInstr , MachineInstr > >::iterator
				444	DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
				445	std::pair<MachineInstr , MachineInstr > P = *prior(DI);
				446	MachineInstr *DbgValue = P.first;
				447	MachineBasicBlock::iterator OrigPrevMI = P.second;
				448	BB->splice(++OrigPrevMI, BB, DbgValue);
				449	if (OrigPrevMI == llvm::prior(RegionEnd))
				450	RegionEnd = DbgValue;
				451	}
				452	DbgValues.clear();
				453	FirstDbgValue = NULL;
				454	}
				455
				456	void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) {
				457	DAG = dag;
				458	TRI = DAG->TRI;
				459	Top.DAG = dag;
				460	Bot.DAG = dag;
				461
				462	// Initialize the HazardRecognizers.
				463	const TargetMachine &TM = DAG->MF.getTarget();
				464	const InstrItineraryData *Itin = TM.getInstrItineraryData();
				465	Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
				466	Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
				467
				468	assert((!ForceTopDown \|\| !ForceBottomUp) &&
				469	"-misched-topdown incompatible with -misched-bottomup");
				470	}
				471
				472	void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
				473	if (SU->isScheduled)
				474	return;
				475
				476	for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
				477	I != E; ++I) {
				478	unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
				479	unsigned MinLatency = I->getMinLatency();
				480	#ifndef NDEBUG
				481	Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
				482	#endif
				483	if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
				484	SU->TopReadyCycle = PredReadyCycle + MinLatency;
				485	}
				486	Top.releaseNode(SU, SU->TopReadyCycle);
				487	}
				488
				489	void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
				490	if (SU->isScheduled)
				491	return;
				492
				493	assert(SU->getInstr() && "Scheduled SUnit must have instr");
				494
				495	for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
				496	I != E; ++I) {
				497	unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
				498	unsigned MinLatency = I->getMinLatency();
				499	#ifndef NDEBUG
				500	Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
				501	#endif
				502	if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
				503	SU->BotReadyCycle = SuccReadyCycle + MinLatency;
				504	}
				505	Bot.releaseNode(SU, SU->BotReadyCycle);
				506	}
				507
				508	/// Does this SU have a hazard within the current instruction group.
				509	///
				510	/// The scheduler supports two modes of hazard recognition. The first is the
				511	/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
				512	/// supports highly complicated in-order reservation tables
				513	/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
				514	///
				515	/// The second is a streamlined mechanism that checks for hazards based on
				516	/// simple counters that the scheduler itself maintains. It explicitly checks
				517	/// for instruction dispatch limitations, including the number of micro-ops that
				518	/// can dispatch per cycle.
				519	///
				520	/// TODO: Also check whether the SU must start a new group.
				521	bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) {
				522	if (HazardRec->isEnabled())
				523	return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
				524
				525	if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
				526	return true;
				527
				528	return false;
				529	}
				530
				531	void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU,
				532	unsigned ReadyCycle) {
				533	if (ReadyCycle < MinReadyCycle)
				534	MinReadyCycle = ReadyCycle;
				535
				536	// Check for interlocks first. For the purpose of other heuristics, an
				537	// instruction that cannot issue appears as if it's not in the ReadyQueue.
				538	if (ReadyCycle > CurrCycle \|\| checkHazard(SU))
				539
				540	Pending.push(SU);
				541	else
				542	Available.push(SU);
				543	}
				544
				545	/// Move the boundary of scheduled code by one cycle.
				546	void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
				547	unsigned Width = DAG->getIssueWidth();
				548	IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
				549
				550	assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
				551	unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
				552
				553	if (!HazardRec->isEnabled()) {
				554	// Bypass HazardRec virtual calls.
				555	CurrCycle = NextCycle;
				556	}
				557	else {
				558	// Bypass getHazardType calls in case of long latency.
				559	for (; CurrCycle != NextCycle; ++CurrCycle) {
				560	if (isTop())
				561	HazardRec->AdvanceCycle();
				562	else
				563	HazardRec->RecedeCycle();
				564	}
				565	}
				566	CheckPending = true;
				567
				568	DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
				569	<< CurrCycle << '\n');
				570	}
				571
				572	/// Move the boundary of scheduled code by one SUnit.
				573	void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
				574
				575	// Update the reservation table.
				576	if (HazardRec->isEnabled()) {
				577	if (!isTop() && SU->isCall) {
				578	// Calls are scheduled with their preceding instructions. For bottom-up
				579	// scheduling, clear the pipeline state before emitting.
				580	HazardRec->Reset();
				581	}
				582	HazardRec->EmitInstruction(SU);
				583	}
				584	// Check the instruction group dispatch limit.
				585	// TODO: Check if this SU must end a dispatch group.
				586	IssueCount += DAG->getNumMicroOps(SU->getInstr());
				587	if (IssueCount >= DAG->getIssueWidth()) {
				588	DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
				589	bumpCycle();
				590	}
				591	}
				592
				593	/// Release pending ready nodes in to the available queue. This makes them
				594	/// visible to heuristics.
				595	void ConvergingVLIWScheduler::SchedBoundary::releasePending() {
				596	// If the available queue is empty, it is safe to reset MinReadyCycle.
				597	if (Available.empty())
				598	MinReadyCycle = UINT_MAX;
				599
				600	// Check to see if any of the pending instructions are ready to issue. If
				601	// so, add them to the available queue.
				602	for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
				603	SUnit SU = (Pending.begin()+i);
				604	unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
				605
				606	if (ReadyCycle < MinReadyCycle)
				607	MinReadyCycle = ReadyCycle;
				608
				609	if (ReadyCycle > CurrCycle)
				610	continue;
				611
				612	if (checkHazard(SU))
				613	continue;
				614
				615	Available.push(SU);
				616	Pending.remove(Pending.begin()+i);
				617	--i; --e;
				618	}
				619	CheckPending = false;
				620	}
				621
				622	/// Remove SU from the ready set for this boundary.
				623	void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) {
				624	if (Available.isInQueue(SU))
				625	Available.remove(Available.find(SU));
				626	else {
				627	assert(Pending.isInQueue(SU) && "bad ready count");
				628	Pending.remove(Pending.find(SU));
				629	}
				630	}
				631
				632	/// If this queue only has one ready candidate, return it. As a side effect,
				633	/// advance the cycle until at least one node is ready. If multiple instructions
				634	/// are ready, return NULL.
				635	SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
				636	if (CheckPending)
				637	releasePending();
				638
				639	for (unsigned i = 0; Available.empty(); ++i) {
				640	assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
				641	"permanent hazard"); (void)i;
				642	bumpCycle();
				643	releasePending();
				644	}
				645	if (Available.size() == 1)
				646	return *Available.begin();
				647	return NULL;
				648	}
				649
				650	#ifndef NDEBUG
				651	void ConvergingVLIWScheduler::traceCandidate(const char *Label, const ReadyQueue &Q,
				652	SUnit *SU, PressureElement P) {
				653	dbgs() << Label << " " << Q.getName() << " ";
				654	if (P.isValid())
				655	dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
				656	<< " ";
				657	else
				658	dbgs() << " ";
				659	SU->dump(DAG);
				660	}
				661	#endif
				662
				663	// Constants used to denote relative importance of
				664	// heuristic components for cost computation.
				665	static const unsigned PriorityOne = 200;
				666	static const unsigned PriorityThree = 50;
				667	static const unsigned ScaleTwo = 10;
				668	static const unsigned FactorOne = 2;
				669
				670	/// Single point to compute overall scheduling cost.
				671	/// TODO: More heuristics will be used soon.
				672	int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
				673	SchedCandidate &Candidate,
				674	RegPressureDelta &Delta,
				675	bool verbose) {
				676	// Initial trivial priority.
				677	int ResCount = 1;
				678
				679	// Do not waste time on a node that is already scheduled.
				680	if (!SU \|\| SU->isScheduled)
				681	return ResCount;
				682
				683	// Forced priority is high.
				684	if (SU->isScheduleHigh)
				685	ResCount += PriorityOne;
				686
				687	// Critical path first.
				688	if (Q.getID() == TopQID)
				689	ResCount += (SU->getHeight() * ScaleTwo);
				690	else
				691	ResCount += (SU->getDepth() * ScaleTwo);
				692
				693	// If resources are available for it, multiply the
				694	// chance of scheduling.
				695	if (DAG->getTopResourceModel()->isResourceAvailable(SU))
				696	ResCount <<= FactorOne;
				697
				698	// Factor in reg pressure as a heuristic.
				699	ResCount -= (Delta.Excess.UnitIncrease * PriorityThree);
				700	ResCount -= (Delta.CriticalMax.UnitIncrease * PriorityThree);
				701
				702	DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
				703
				704	return ResCount;
				705	}
				706
				707	/// Pick the best candidate from the top queue.
				708	///
				709	/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
				710	/// DAG building. To adjust for the current scheduling location we need to
				711	/// maintain the number of vreg uses remaining to be top-scheduled.
				712	ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
				713	pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
				714	SchedCandidate &Candidate) {
				715	DEBUG(Q.dump());
				716
				717	// getMaxPressureDelta temporarily modifies the tracker.
				718	RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
				719
				720	// BestSU remains NULL if no top candidates beat the best existing candidate.
				721	CandResult FoundCandidate = NoCand;
				722	for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
				723	RegPressureDelta RPDelta;
				724	TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
				725	DAG->getRegionCriticalPSets(),
				726	DAG->getRegPressure().MaxSetPressure);
				727
				728	int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
				729
				730	// Initialize the candidate if needed.
				731	if (!Candidate.SU) {
				732	Candidate.SU = *I;
				733	Candidate.RPDelta = RPDelta;
				734	Candidate.SCost = CurrentCost;
				735	FoundCandidate = NodeOrder;
				736	continue;
				737	}
				738
				739
				740	// Best cost.
				741	if (CurrentCost > Candidate.SCost) {
				742	DEBUG(traceCandidate("CCAND", Q, *I));
				743	Candidate.SU = *I;
				744	Candidate.RPDelta = RPDelta;
				745	Candidate.SCost = CurrentCost;
				746	FoundCandidate = BestCost;
				747	continue;
				748	}
				749
				750	// Fall through to original instruction order.
				751	// Only consider node order if Candidate was chosen from this Q.
				752	if (FoundCandidate == NoCand)
				753	continue;
				754	}
				755	return FoundCandidate;
				756	}
				757
				758	/// Pick the best candidate node from either the top or bottom queue.
				759	SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
				760	// Schedule as far as possible in the direction of no choice. This is most
				761	// efficient, but also provides the best heuristics for CriticalPSets.
				762	if (SUnit *SU = Bot.pickOnlyChoice()) {
				763	IsTopNode = false;
				764	return SU;
				765	}
				766	if (SUnit *SU = Top.pickOnlyChoice()) {
				767	IsTopNode = true;
				768	return SU;
				769	}
				770	SchedCandidate BotCand;
				771	// Prefer bottom scheduling when heuristics are silent.
				772	CandResult BotResult = pickNodeFromQueue(Bot.Available,
				773	DAG->getBotRPTracker(), BotCand);
				774	assert(BotResult != NoCand && "failed to find the first candidate");
				775
				776	// If either Q has a single candidate that provides the least increase in
				777	// Excess pressure, we can immediately schedule from that Q.
				778	//
				779	// RegionCriticalPSets summarizes the pressure within the scheduled region and
				780	// affects picking from either Q. If scheduling in one direction must
				781	// increase pressure for one of the excess PSets, then schedule in that
				782	// direction first to provide more freedom in the other direction.
				783	if (BotResult == SingleExcess \|\| BotResult == SingleCritical) {
				784	IsTopNode = false;
				785	return BotCand.SU;
				786	}
				787	// Check if the top Q has a better candidate.
				788	SchedCandidate TopCand;
				789	CandResult TopResult = pickNodeFromQueue(Top.Available,
				790	DAG->getTopRPTracker(), TopCand);
				791	assert(TopResult != NoCand && "failed to find the first candidate");
				792
				793	if (TopResult == SingleExcess \|\| TopResult == SingleCritical) {
				794	IsTopNode = true;
				795	return TopCand.SU;
				796	}
				797	// If either Q has a single candidate that minimizes pressure above the
				798	// original region's pressure pick it.
				799	if (BotResult == SingleMax) {
				800	IsTopNode = false;
				801	return BotCand.SU;
				802	}
				803	if (TopResult == SingleMax) {
				804	IsTopNode = true;
				805	return TopCand.SU;
				806	}
				807	if (TopCand.SCost > BotCand.SCost) {
				808	IsTopNode = true;
				809	return TopCand.SU;
				810	}
				811	// Otherwise prefer the bottom candidate in node order.
				812	IsTopNode = false;
				813	return BotCand.SU;
				814	}
				815
				816	/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
				817	SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
				818	if (DAG->top() == DAG->bottom()) {
				819	assert(Top.Available.empty() && Top.Pending.empty() &&
				820	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
				821	return NULL;
				822	}
				823	SUnit *SU;
				824	if (ForceTopDown) {
				825	SU = Top.pickOnlyChoice();
				826	if (!SU) {
				827	SchedCandidate TopCand;
				828	CandResult TopResult =
				829	pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
				830	assert(TopResult != NoCand && "failed to find the first candidate");
				831	(void)TopResult;
				832	SU = TopCand.SU;
				833	}
				834	IsTopNode = true;
				835	} else if (ForceBottomUp) {
				836	SU = Bot.pickOnlyChoice();
				837	if (!SU) {
				838	SchedCandidate BotCand;
				839	CandResult BotResult =
				840	pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
				841	assert(BotResult != NoCand && "failed to find the first candidate");
				842	(void)BotResult;
				843	SU = BotCand.SU;
				844	}
				845	IsTopNode = false;
				846	} else {
				847	SU = pickNodeBidrectional(IsTopNode);
				848	}
				849	if (SU->isTopReady())
				850	Top.removeReady(SU);
				851	if (SU->isBottomReady())
				852	Bot.removeReady(SU);
				853
				854	DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
				855	<< " Scheduling Instruction in cycle "
				856	<< (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
				857	SU->dump(DAG));
				858	return SU;
				859	}
				860
				861	/// Update the scheduler's state after scheduling a node. This is the same node
				862	/// that was just returned by pickNode(). However, VLIWMachineScheduler needs to update
				863	/// it's state based on the current cycle before MachineSchedStrategy does.
				864	void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
				865	if (IsTopNode) {
				866	SU->TopReadyCycle = Top.CurrCycle;
				867	Top.bumpNode(SU);
				868	}
				869	else {
				870	SU->BotReadyCycle = Bot.CurrCycle;
				871	Bot.bumpNode(SU);
				872	}
				873	}
				874