blob: e714eeffad008ee1210a84767a82549765c2c254 [file] [log] [blame]
Tom Stellard0d23ebe2016-08-29 19:42:52 +00001//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// This contains a MachineSchedStrategy implementation for maximizing wave
12/// occupancy on GCN hardware.
13//===----------------------------------------------------------------------===//
14
15#include "GCNSchedStrategy.h"
16#include "AMDGPUSubtarget.h"
17#include "SIInstrInfo.h"
18#include "SIMachineFunctionInfo.h"
19#include "SIRegisterInfo.h"
20#include "llvm/CodeGen/RegisterClassInfo.h"
21
22#define DEBUG_TYPE "misched"
23
24using namespace llvm;
25
26GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
27 const MachineSchedContext *C) :
28 GenericScheduler(C) { }
29
30static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
31 const MachineFunction &MF) {
32
33 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
34 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
35 unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs),
36 ST.getOccupancyWithNumVGPRs(VGPRs));
37 return std::min(MinRegOccupancy,
38 ST.getOccupancyWithLocalMemSize(MFI->getLDSSize()));
39}
40
41void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
42 bool AtTop, const RegPressureTracker &RPTracker,
43 const SIRegisterInfo *SRI,
44 int SGPRPressure,
45 int VGPRPressure,
46 int SGPRExcessLimit,
47 int VGPRExcessLimit,
48 int SGPRCriticalLimit,
49 int VGPRCriticalLimit) {
50
51 Cand.SU = SU;
52 Cand.AtTop = AtTop;
53
54 // getDownwardPressure() and getUpwardPressure() make temporary changes to
55 // the the tracker, so we need to pass those function a non-const copy.
56 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
57
58 std::vector<unsigned> Pressure;
59 std::vector<unsigned> MaxPressure;
60
61 if (AtTop)
62 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
63 else {
64 // FIXME: I think for bottom up scheduling, the register pressure is cached
65 // and can be retrieved by DAG->getPressureDif(SU).
66 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
67 }
Matt Arsenaultf3dd8632016-11-01 00:55:14 +000068
Tom Stellard0d23ebe2016-08-29 19:42:52 +000069 int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
70 int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
71
72 // If two instructions increase the pressure of different register sets
73 // by the same amount, the generic scheduler will prefer to schedule the
74 // instruction that increases the set with the least amount of registers,
75 // which in our case would be SGPRs. This is rarely what we want, so
76 // when we report excess/critical register pressure, we do it either
77 // only for VGPRs or only for SGPRs.
78
79 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
80 const int MaxVGPRPressureInc = 16;
81 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
82 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
83
84
85 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
86 // to increase the likelihood we don't go over the limits. We should improve
87 // the analysis to look through dependencies to find the path with the least
88 // register pressure.
89 // FIXME: This is also necessary, because some passes that run after
90 // scheduling and before regalloc increase register pressure.
91 const int ErrorMargin = 3;
92 VGPRExcessLimit -= ErrorMargin;
93 SGPRExcessLimit -= ErrorMargin;
94
95 // We only need to update the RPDelata for instructions that increase
96 // register pressure. Instructions that decrease or keep reg pressure
97 // the same will be marked as RegExcess in tryCandidate() when they
98 // are compared with instructions that increase the register pressure.
99 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
100 Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
101 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
102 }
103
104 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
105 Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
Valery Pykhtin75d1de92017-01-26 10:51:47 +0000106 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000107 }
108
109 // Register pressure is considered 'CRITICAL' if it is approaching a value
110 // that would reduce the wave occupancy for the execution unit. When
111 // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
112 // has the same cost, so we don't need to prefer one over the other.
113
114 VGPRCriticalLimit -= ErrorMargin;
115 SGPRCriticalLimit -= ErrorMargin;
116
117 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
118 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
119
120 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
121 if (SGPRDelta > VGPRDelta) {
122 Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
123 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
124 } else {
125 Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
126 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
127 }
128 }
129}
130
131// This function is mostly cut and pasted from
132// GenericScheduler::pickNodeFromQueue()
133void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
134 const CandPolicy &ZonePolicy,
135 const RegPressureTracker &RPTracker,
136 SchedCandidate &Cand) {
137 const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>();
138 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
139 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
140 unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
141 unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
142 unsigned SGPRExcessLimit =
143 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
144 unsigned VGPRExcessLimit =
145 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
146 unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
Marek Olsak91f22fb2016-12-09 19:49:40 +0000147 unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
Konstantin Zhuravlyov1d650262016-09-06 20:22:28 +0000148 unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000149
150 ReadyQueue &Q = Zone.Available;
151 for (SUnit *SU : Q) {
152
153 SchedCandidate TryCand(ZonePolicy);
154 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
155 SGPRPressure, VGPRPressure,
156 SGPRExcessLimit, VGPRExcessLimit,
157 SGPRCriticalLimit, VGPRCriticalLimit);
158 // Pass SchedBoundary only when comparing nodes from the same boundary.
159 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
160 GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
161 if (TryCand.Reason != NoCand) {
162 // Initialize resource delta if needed in case future heuristics query it.
163 if (TryCand.ResDelta == SchedResourceDelta())
164 TryCand.initResourceDelta(Zone.DAG, SchedModel);
165 Cand.setBest(TryCand);
166 }
167 }
168}
169
170static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
171 switch (Reason) {
172 default:
173 return Reason;
174 case GenericSchedulerBase::RegCritical:
175 case GenericSchedulerBase::RegExcess:
176 return -Reason;
177 }
178}
179
180// This function is mostly cut and pasted from
181// GenericScheduler::pickNodeBidirectional()
182SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
183 // Schedule as far as possible in the direction of no choice. This is most
184 // efficient, but also provides the best heuristics for CriticalPSets.
185 if (SUnit *SU = Bot.pickOnlyChoice()) {
186 IsTopNode = false;
187 return SU;
188 }
189 if (SUnit *SU = Top.pickOnlyChoice()) {
190 IsTopNode = true;
191 return SU;
192 }
193 // Set the bottom-up policy based on the state of the current bottom zone and
194 // the instructions outside the zone, including the top zone.
195 CandPolicy BotPolicy;
196 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
197 // Set the top-down policy based on the state of the current top zone and
198 // the instructions outside the zone, including the bottom zone.
199 CandPolicy TopPolicy;
200 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
201
202 // See if BotCand is still valid (because we previously scheduled from Top).
203 DEBUG(dbgs() << "Picking from Bot:\n");
204 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
205 BotCand.Policy != BotPolicy) {
206 BotCand.reset(CandPolicy());
207 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
208 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
209 } else {
210 DEBUG(traceCandidate(BotCand));
211 }
212
213 // Check if the top Q has a better candidate.
214 DEBUG(dbgs() << "Picking from Top:\n");
215 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
216 TopCand.Policy != TopPolicy) {
217 TopCand.reset(CandPolicy());
218 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
219 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
220 } else {
221 DEBUG(traceCandidate(TopCand));
222 }
223
224 // Pick best from BotCand and TopCand.
225 DEBUG(
226 dbgs() << "Top Cand: ";
227 traceCandidate(BotCand);
228 dbgs() << "Bot Cand: ";
229 traceCandidate(TopCand);
230 );
231 SchedCandidate Cand;
232 if (TopCand.Reason == BotCand.Reason) {
233 Cand = BotCand;
234 GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
235 TopCand.Reason = NoCand;
236 GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
237 if (TopCand.Reason != NoCand) {
Matt Arsenaultf3dd8632016-11-01 00:55:14 +0000238 Cand.setBest(TopCand);
Tom Stellard0d23ebe2016-08-29 19:42:52 +0000239 } else {
240 TopCand.Reason = TopReason;
241 }
242 } else {
243 if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) {
244 Cand = TopCand;
245 } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
246 Cand = BotCand;
247 } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
248 Cand = TopCand;
249 } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
250 Cand = BotCand;
251 } else {
252 int TopRank = getBidirectionalReasonRank(TopCand.Reason);
253 int BotRank = getBidirectionalReasonRank(BotCand.Reason);
254 if (TopRank > BotRank) {
255 Cand = TopCand;
256 } else {
257 Cand = BotCand;
258 }
259 }
260 }
261 DEBUG(
262 dbgs() << "Picking: ";
263 traceCandidate(Cand);
264 );
265
266 IsTopNode = Cand.AtTop;
267 return Cand.SU;
268}
269
270// This function is mostly cut and pasted from
271// GenericScheduler::pickNode()
272SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
273 if (DAG->top() == DAG->bottom()) {
274 assert(Top.Available.empty() && Top.Pending.empty() &&
275 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
276 return nullptr;
277 }
278 SUnit *SU;
279 do {
280 if (RegionPolicy.OnlyTopDown) {
281 SU = Top.pickOnlyChoice();
282 if (!SU) {
283 CandPolicy NoPolicy;
284 TopCand.reset(NoPolicy);
285 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
286 assert(TopCand.Reason != NoCand && "failed to find a candidate");
287 SU = TopCand.SU;
288 }
289 IsTopNode = true;
290 } else if (RegionPolicy.OnlyBottomUp) {
291 SU = Bot.pickOnlyChoice();
292 if (!SU) {
293 CandPolicy NoPolicy;
294 BotCand.reset(NoPolicy);
295 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
296 assert(BotCand.Reason != NoCand && "failed to find a candidate");
297 SU = BotCand.SU;
298 }
299 IsTopNode = false;
300 } else {
301 SU = pickNodeBidirectional(IsTopNode);
302 }
303 } while (SU->isScheduled);
304
305 if (SU->isTopReady())
306 Top.removeReady(SU);
307 if (SU->isBottomReady())
308 Bot.removeReady(SU);
309
310 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
311 return SU;
312}