blob: 6d4ee217cf199d501f23d701f1a58bdd1b7804af [file] [log] [blame]
Tom Stellard0d23ebe2016-08-29 19:42:52 +00001//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// This contains a MachineSchedStrategy implementation for maximizing wave
12/// occupancy on GCN hardware.
13//===----------------------------------------------------------------------===//
14
15#include "GCNSchedStrategy.h"
16#include "AMDGPUSubtarget.h"
17#include "SIInstrInfo.h"
18#include "SIMachineFunctionInfo.h"
19#include "SIRegisterInfo.h"
20#include "llvm/CodeGen/RegisterClassInfo.h"
21
22#define DEBUG_TYPE "misched"
23
24using namespace llvm;
25
26GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
27 const MachineSchedContext *C) :
28 GenericScheduler(C) { }
29
30static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
31 const MachineFunction &MF) {
32
33 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
34 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
35 unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs),
36 ST.getOccupancyWithNumVGPRs(VGPRs));
37 return std::min(MinRegOccupancy,
Stanislav Mekhanoshin2b913b12017-02-01 22:59:50 +000038 ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
39 *MF.getFunction()));
Tom Stellard0d23ebe2016-08-29 19:42:52 +000040}
41
Stanislav Mekhanoshin582a5232017-02-15 17:19:50 +000042void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
43 GenericScheduler::initialize(DAG);
44
45 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
46
47 // FIXME: This is also necessary, because some passes that run after
48 // scheduling and before regalloc increase register pressure.
49 const int ErrorMargin = 3;
50
51 SGPRExcessLimit = Context->RegClassInfo
52 ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
53 VGPRExcessLimit = Context->RegClassInfo
54 ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
55 SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
56 SRI->getSGPRPressureSet()) - ErrorMargin;
57 VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
58 SRI->getVGPRPressureSet()) - ErrorMargin;
59}
60
Tom Stellard0d23ebe2016-08-29 19:42:52 +000061void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
62 bool AtTop, const RegPressureTracker &RPTracker,
63 const SIRegisterInfo *SRI,
Stanislav Mekhanoshin582a5232017-02-15 17:19:50 +000064 unsigned SGPRPressure,
65 unsigned VGPRPressure) {
Tom Stellard0d23ebe2016-08-29 19:42:52 +000066
67 Cand.SU = SU;
68 Cand.AtTop = AtTop;
69
70 // getDownwardPressure() and getUpwardPressure() make temporary changes to
71 // the the tracker, so we need to pass those function a non-const copy.
72 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
73
74 std::vector<unsigned> Pressure;
75 std::vector<unsigned> MaxPressure;
76
77 if (AtTop)
78 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
79 else {
80 // FIXME: I think for bottom up scheduling, the register pressure is cached
81 // and can be retrieved by DAG->getPressureDif(SU).
82 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
83 }
Matt Arsenaultf3dd8632016-11-01 00:55:14 +000084
Stanislav Mekhanoshin582a5232017-02-15 17:19:50 +000085 unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
86 unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
Tom Stellard0d23ebe2016-08-29 19:42:52 +000087
88 // If two instructions increase the pressure of different register sets
89 // by the same amount, the generic scheduler will prefer to schedule the
90 // instruction that increases the set with the least amount of registers,
91 // which in our case would be SGPRs. This is rarely what we want, so
92 // when we report excess/critical register pressure, we do it either
93 // only for VGPRs or only for SGPRs.
94
95 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
Stanislav Mekhanoshin582a5232017-02-15 17:19:50 +000096 const unsigned MaxVGPRPressureInc = 16;
Tom Stellard0d23ebe2016-08-29 19:42:52 +000097 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
98 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
99
100
101 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
102 // to increase the likelihood we don't go over the limits. We should improve
103 // the analysis to look through dependencies to find the path with the least
104 // register pressure.
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000105
106 // We only need to update the RPDelata for instructions that increase
107 // register pressure. Instructions that decrease or keep reg pressure
108 // the same will be marked as RegExcess in tryCandidate() when they
109 // are compared with instructions that increase the register pressure.
110 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
111 Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
112 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
113 }
114
115 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
116 Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
Valery Pykhtin75d1de92017-01-26 10:51:47 +0000117 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000118 }
119
120 // Register pressure is considered 'CRITICAL' if it is approaching a value
121 // that would reduce the wave occupancy for the execution unit. When
122 // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
123 // has the same cost, so we don't need to prefer one over the other.
124
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000125 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
126 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
127
128 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
129 if (SGPRDelta > VGPRDelta) {
130 Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
131 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
132 } else {
133 Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
134 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
135 }
136 }
137}
138
139// This function is mostly cut and pasted from
140// GenericScheduler::pickNodeFromQueue()
141void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
142 const CandPolicy &ZonePolicy,
143 const RegPressureTracker &RPTracker,
144 SchedCandidate &Cand) {
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000145 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
146 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
147 unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
148 unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000149 ReadyQueue &Q = Zone.Available;
150 for (SUnit *SU : Q) {
151
152 SchedCandidate TryCand(ZonePolicy);
153 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
Stanislav Mekhanoshin582a5232017-02-15 17:19:50 +0000154 SGPRPressure, VGPRPressure);
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000155 // Pass SchedBoundary only when comparing nodes from the same boundary.
156 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
157 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
158 if (TryCand.Reason != NoCand) {
159 // Initialize resource delta if needed in case future heuristics query it.
160 if (TryCand.ResDelta == SchedResourceDelta())
161 TryCand.initResourceDelta(Zone.DAG, SchedModel);
162 Cand.setBest(TryCand);
163 }
164 }
165}
166
167static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
168 switch (Reason) {
169 default:
170 return Reason;
171 case GenericSchedulerBase::RegCritical:
172 case GenericSchedulerBase::RegExcess:
173 return -Reason;
174 }
175}
176
177// This function is mostly cut and pasted from
178// GenericScheduler::pickNodeBidirectional()
179SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
180 // Schedule as far as possible in the direction of no choice. This is most
181 // efficient, but also provides the best heuristics for CriticalPSets.
182 if (SUnit *SU = Bot.pickOnlyChoice()) {
183 IsTopNode = false;
184 return SU;
185 }
186 if (SUnit *SU = Top.pickOnlyChoice()) {
187 IsTopNode = true;
188 return SU;
189 }
190 // Set the bottom-up policy based on the state of the current bottom zone and
191 // the instructions outside the zone, including the top zone.
192 CandPolicy BotPolicy;
193 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
194 // Set the top-down policy based on the state of the current top zone and
195 // the instructions outside the zone, including the bottom zone.
196 CandPolicy TopPolicy;
197 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
198
199 // See if BotCand is still valid (because we previously scheduled from Top).
200 DEBUG(dbgs() << "Picking from Bot:\n");
201 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
202 BotCand.Policy != BotPolicy) {
203 BotCand.reset(CandPolicy());
204 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
205 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
206 } else {
207 DEBUG(traceCandidate(BotCand));
208 }
209
210 // Check if the top Q has a better candidate.
211 DEBUG(dbgs() << "Picking from Top:\n");
212 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
213 TopCand.Policy != TopPolicy) {
214 TopCand.reset(CandPolicy());
215 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
216 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
217 } else {
218 DEBUG(traceCandidate(TopCand));
219 }
220
221 // Pick best from BotCand and TopCand.
222 DEBUG(
223 dbgs() << "Top Cand: ";
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000224 traceCandidate(TopCand);
Stanislav Mekhanoshin99be1af2017-02-06 23:16:51 +0000225 dbgs() << "Bot Cand: ";
226 traceCandidate(BotCand);
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000227 );
228 SchedCandidate Cand;
229 if (TopCand.Reason == BotCand.Reason) {
230 Cand = BotCand;
231 GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
232 TopCand.Reason = NoCand;
233 GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
234 if (TopCand.Reason != NoCand) {
Matt Arsenaultf3dd8632016-11-01 00:55:14 +0000235 Cand.setBest(TopCand);
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000236 } else {
237 TopCand.Reason = TopReason;
238 }
239 } else {
240 if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) {
241 Cand = TopCand;
242 } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
243 Cand = BotCand;
244 } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
245 Cand = TopCand;
246 } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
247 Cand = BotCand;
248 } else {
249 int TopRank = getBidirectionalReasonRank(TopCand.Reason);
250 int BotRank = getBidirectionalReasonRank(BotCand.Reason);
251 if (TopRank > BotRank) {
252 Cand = TopCand;
253 } else {
254 Cand = BotCand;
255 }
256 }
257 }
258 DEBUG(
259 dbgs() << "Picking: ";
260 traceCandidate(Cand);
261 );
262
263 IsTopNode = Cand.AtTop;
264 return Cand.SU;
265}
266
267// This function is mostly cut and pasted from
268// GenericScheduler::pickNode()
269SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
270 if (DAG->top() == DAG->bottom()) {
271 assert(Top.Available.empty() && Top.Pending.empty() &&
272 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
273 return nullptr;
274 }
275 SUnit *SU;
276 do {
277 if (RegionPolicy.OnlyTopDown) {
278 SU = Top.pickOnlyChoice();
279 if (!SU) {
280 CandPolicy NoPolicy;
281 TopCand.reset(NoPolicy);
282 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
283 assert(TopCand.Reason != NoCand && "failed to find a candidate");
284 SU = TopCand.SU;
285 }
286 IsTopNode = true;
287 } else if (RegionPolicy.OnlyBottomUp) {
288 SU = Bot.pickOnlyChoice();
289 if (!SU) {
290 CandPolicy NoPolicy;
291 BotCand.reset(NoPolicy);
292 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
293 assert(BotCand.Reason != NoCand && "failed to find a candidate");
294 SU = BotCand.SU;
295 }
296 IsTopNode = false;
297 } else {
298 SU = pickNodeBidirectional(IsTopNode);
299 }
300 } while (SU->isScheduled);
301
302 if (SU->isTopReady())
303 Top.removeReady(SU);
304 if (SU->isBottomReady())
305 Bot.removeReady(SU);
306
307 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
308 return SU;
309}
Stanislav Mekhanoshin582a5232017-02-15 17:19:50 +0000310
311void GCNScheduleDAGMILive::schedule() {
312 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
313
314 std::vector<MachineInstr*> Unsched;
315 Unsched.reserve(NumRegionInstrs);
316 for (auto &I : *this)
317 Unsched.push_back(&I);
318
319 ScheduleDAGMILive::schedule();
320
321 // Check the results of scheduling.
322 GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
323 std::vector<unsigned> UnschedPressure = getRegPressure().MaxSetPressure;
324 unsigned MaxSGPRs = std::max(
325 getTopRPTracker().getPressure().MaxSetPressure[SRI->getSGPRPressureSet()],
326 getBotRPTracker().getPressure().MaxSetPressure[SRI->getSGPRPressureSet()]);
327 unsigned MaxVGPRs = std::max(
328 getTopRPTracker().getPressure().MaxSetPressure[SRI->getVGPRPressureSet()],
329 getBotRPTracker().getPressure().MaxSetPressure[SRI->getVGPRPressureSet()]);
330 DEBUG(dbgs() << "Pressure after scheduling:\nSGPR = " << MaxSGPRs
331 << "\nVGPR = " << MaxVGPRs << '\n');
332 if (MaxSGPRs <= S.SGPRCriticalLimit &&
333 MaxVGPRs <= S.VGPRCriticalLimit) {
334 DEBUG(dbgs() << "Pressure in desired limits, done.\n");
335 return;
336 }
337 unsigned WavesAfter = getMaxWaves(MaxSGPRs, MaxVGPRs, MF);
338 unsigned WavesUnsched = getMaxWaves(UnschedPressure[SRI->getSGPRPressureSet()],
339 UnschedPressure[SRI->getVGPRPressureSet()], MF);
340 DEBUG(dbgs() << "Occupancy before scheduling: " << WavesUnsched <<
341 ", after " << WavesAfter << ".\n");
342 if (WavesAfter >= WavesUnsched)
343 return;
344
345 DEBUG(dbgs() << "Attempting to revert scheduling.\n");
346 RegionEnd = RegionBegin;
347 for (MachineInstr *MI : Unsched) {
348 if (MI->getIterator() != RegionEnd) {
349 BB->remove(MI);
350 BB->insert(RegionEnd, MI);
Stanislav Mekhanoshin080889c2017-02-28 16:26:27 +0000351 if (LIS)
Stanislav Mekhanoshin582a5232017-02-15 17:19:50 +0000352 LIS->handleMove(*MI, true);
Stanislav Mekhanoshin080889c2017-02-28 16:26:27 +0000353 }
354 // Reset read-undef flags and update them later.
355 for (auto &Op : MI->operands())
356 if (Op.isReg() && Op.isDef())
357 Op.setIsUndef(false);
358 RegisterOperands RegOpers;
359 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
360 if (ShouldTrackLaneMasks) {
361 // Adjust liveness and add missing dead+read-undef flags.
362 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
363 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
364 } else {
365 // Adjust for missing dead-def flags.
366 RegOpers.detectDeadDefs(*MI, *LIS);
Stanislav Mekhanoshin582a5232017-02-15 17:19:50 +0000367 }
368 RegionEnd = MI->getIterator();
369 ++RegionEnd;
370 DEBUG(dbgs() << "Scheduling " << *MI);
371 }
372 RegionBegin = Unsched.front()->getIterator();
373
374 placeDebugValues();
375}