blob: 5e7d7ed37b041c8a9cff08dbb3f842e39ede2629 [file] [log] [blame]
Nicolai Haehnle02c32912016-01-13 16:10:10 +00001//===-- SIMachineScheduler.h - SI Scheduler Interface -*- C++ -*-------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Machine Scheduler interface
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17
18#include "SIInstrInfo.h"
19#include "llvm/CodeGen/MachineScheduler.h"
20#include "llvm/CodeGen/RegisterPressure.h"
21
22using namespace llvm;
23
24namespace llvm {
25
26enum SIScheduleCandReason {
27 NoCand,
28 RegUsage,
29 Latency,
30 Successor,
31 Depth,
32 NodeOrder
33};
34
35struct SISchedulerCandidate {
36 // The reason for this candidate.
37 SIScheduleCandReason Reason;
38
39 // Set of reasons that apply to multiple candidates.
40 uint32_t RepeatReasonSet;
41
42 SISchedulerCandidate()
43 : Reason(NoCand), RepeatReasonSet(0) {}
44
45 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
46 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
47};
48
49class SIScheduleDAGMI;
50class SIScheduleBlockCreator;
51
52class SIScheduleBlock {
53 SIScheduleDAGMI *DAG;
54 SIScheduleBlockCreator *BC;
55
56 std::vector<SUnit*> SUnits;
57 std::map<unsigned, unsigned> NodeNum2Index;
58 std::vector<SUnit*> TopReadySUs;
59 std::vector<SUnit*> ScheduledSUnits;
60
61 /// The top of the unscheduled zone.
62 IntervalPressure TopPressure;
63 RegPressureTracker TopRPTracker;
64
65 // Pressure: number of said class of registers needed to
66 // store the live virtual and real registers.
67 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
68 // Pressure of additional registers required inside the block.
69 std::vector<unsigned> InternalAdditionnalPressure;
70 // Pressure of input and output registers
71 std::vector<unsigned> LiveInPressure;
72 std::vector<unsigned> LiveOutPressure;
73 // Registers required by the block, and outputs.
74 // We do track only virtual registers.
75 // Note that some registers are not 32 bits,
76 // and thus the pressure is not equal
77 // to the number of live registers.
78 std::set<unsigned> LiveInRegs;
79 std::set<unsigned> LiveOutRegs;
80
81 bool Scheduled;
82 bool HighLatencyBlock;
83
84 std::vector<unsigned> HasLowLatencyNonWaitedParent;
85
86 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
87 unsigned ID;
88
89 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
90 std::vector<SIScheduleBlock*> Succs; // All blocks successors.
91 unsigned NumHighLatencySuccessors;
92
93public:
94 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
95 unsigned ID):
96 DAG(DAG), BC(BC), SUnits(), TopReadySUs(), ScheduledSUnits(),
97 TopRPTracker(TopPressure), Scheduled(false),
98 HighLatencyBlock(false), ID(ID),
99 Preds(), Succs(), NumHighLatencySuccessors(0) {};
100
101 ~SIScheduleBlock() {};
102
103 unsigned getID() const { return ID; }
104
105 /// Functions for Block construction.
106 void addUnit(SUnit *SU);
107
108 // When all SUs have been added.
109 void finalizeUnits();
110
111 // Add block pred, which has instruction predecessor of SU.
112 void addPred(SIScheduleBlock *Pred);
113 void addSucc(SIScheduleBlock *Succ);
114
115 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
116 const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
117
118 unsigned Height; // Maximum topdown path length to block without outputs
119 unsigned Depth; // Maximum bottomup path length to block without inputs
120
121 unsigned getNumHighLatencySuccessors() const {
122 return NumHighLatencySuccessors;
123 }
124
125 bool isHighLatencyBlock() { return HighLatencyBlock; }
126
127 // This is approximative.
128 // Ideally should take into accounts some instructions (rcp, etc)
129 // are 4 times slower.
130 int getCost() { return SUnits.size(); }
131
132 // The block Predecessors and Successors must be all registered
133 // before fastSchedule().
134 // Fast schedule with no particular requirement.
135 void fastSchedule();
136
137 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
138
139 // Complete schedule that will try to minimize reg pressure and
140 // low latencies, and will fill liveins and liveouts.
141 // Needs all MIs to be grouped between BeginBlock and EndBlock.
142 // The MIs can be moved after the scheduling,
143 // it is just used to allow correct track of live registers.
144 void schedule(MachineBasicBlock::iterator BeginBlock,
145 MachineBasicBlock::iterator EndBlock);
146
147 bool isScheduled() { return Scheduled; }
148
149
150 // Needs the block to be scheduled inside
151 // TODO: find a way to compute it.
152 std::vector<unsigned> &getInternalAdditionnalRegUsage() {
153 return InternalAdditionnalPressure;
154 }
155
156 std::set<unsigned> &getInRegs() { return LiveInRegs; }
157 std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
158
159 void printDebug(bool Full);
160
161private:
162 struct SISchedCandidate : SISchedulerCandidate {
163 // The best SUnit candidate.
164 SUnit *SU;
165
166 unsigned SGPRUsage;
167 unsigned VGPRUsage;
168 bool IsLowLatency;
169 unsigned LowLatencyOffset;
170 bool HasLowLatencyNonWaitedParent;
171
172 SISchedCandidate()
173 : SU(nullptr) {}
174
175 bool isValid() const { return SU; }
176
177 // Copy the status of another candidate without changing policy.
178 void setBest(SISchedCandidate &Best) {
179 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
180 SU = Best.SU;
181 Reason = Best.Reason;
182 SGPRUsage = Best.SGPRUsage;
183 VGPRUsage = Best.VGPRUsage;
184 IsLowLatency = Best.IsLowLatency;
185 LowLatencyOffset = Best.LowLatencyOffset;
186 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
187 }
188 };
189
190 void undoSchedule();
191
192 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
193 void releaseSucc(SUnit *SU, SDep *SuccEdge);
194 // InOrOutBlock: restrict to links pointing inside the block (true),
195 // or restrict to links pointing outside the block (false).
196 void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
197
198 void nodeScheduled(SUnit *SU);
199 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
200 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
201 SUnit* pickNode();
202 void traceCandidate(const SISchedCandidate &Cand);
203 void initRegPressure(MachineBasicBlock::iterator BeginBlock,
204 MachineBasicBlock::iterator EndBlock);
205};
206
207struct SIScheduleBlocks {
208 std::vector<SIScheduleBlock*> Blocks;
209 std::vector<int> TopDownIndex2Block;
210 std::vector<int> TopDownBlock2Index;
211};
212
213enum SISchedulerBlockCreatorVariant {
214 LatenciesAlone,
215 LatenciesGrouped,
216 LatenciesAlonePlusConsecutive
217};
218
219class SIScheduleBlockCreator {
220 SIScheduleDAGMI *DAG;
221 // unique_ptr handles freeing memory for us.
222 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
223 std::map<SISchedulerBlockCreatorVariant,
224 SIScheduleBlocks> Blocks;
225 std::vector<SIScheduleBlock*> CurrentBlocks;
226 std::vector<int> Node2CurrentBlock;
227
228 // Topological sort
229 // Maps topological index to the node number.
230 std::vector<int> TopDownIndex2Block;
231 std::vector<int> TopDownBlock2Index;
232 std::vector<int> BottomUpIndex2Block;
233
234 // 0 -> Color not given.
235 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
236 // Above -> Other groups.
237 int NextReservedID;
238 int NextNonReservedID;
239 std::vector<int> CurrentColoring;
240 std::vector<int> CurrentTopDownReservedDependencyColoring;
241 std::vector<int> CurrentBottomUpReservedDependencyColoring;
242
243public:
244 SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
245 ~SIScheduleBlockCreator();
246
247 SIScheduleBlocks
248 getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
249
250 bool isSUInBlock(SUnit *SU, unsigned ID);
251
252private:
253 // Give a Reserved color to every high latency.
254 void colorHighLatenciesAlone();
255
256 // Create groups of high latencies with a Reserved color.
257 void colorHighLatenciesGroups();
258
259 // Compute coloring for topdown and bottom traversals with
260 // different colors depending on dependencies on Reserved colors.
261 void colorComputeReservedDependencies();
262
263 // Give color to all non-colored SUs according to Reserved groups dependencies.
264 void colorAccordingToReservedDependencies();
265
266 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
267 // The new colors are computed according to the dependencies on the other blocks
268 // formed with colorAccordingToReservedDependencies.
269 void colorEndsAccordingToDependencies();
270
271 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
272 void colorForceConsecutiveOrderInGroup();
273
274 // Merge Constant loads that have all their users into another group to the group.
275 // (TODO: else if all their users depend on the same group, put them there)
276 void colorMergeConstantLoadsNextGroup();
277
278 // Merge SUs that have all their users into another group to the group
279 void colorMergeIfPossibleNextGroup();
280
281 // Merge SUs that have all their users into another group to the group,
282 // but only for Reserved groups.
283 void colorMergeIfPossibleNextGroupOnlyForReserved();
284
285 // Merge SUs that have all their users into another group to the group,
286 // but only if the group is no more than a few SUs.
287 void colorMergeIfPossibleSmallGroupsToNextGroup();
288
289 // Divides Blocks with important size.
290 // Idea of implementation: attribute new colors depending on topdown and
291 // bottom up links to other blocks.
292 void cutHugeBlocks();
293
294 // Put in one group all instructions with no users in this scheduling region
295 // (we'd want these groups be at the end).
296 void regroupNoUserInstructions();
297
298 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
299
300 void topologicalSort();
301
302 void scheduleInsideBlocks();
303
304 void fillStats();
305};
306
307enum SISchedulerBlockSchedulerVariant {
308 BlockLatencyRegUsage,
309 BlockRegUsageLatency,
310 BlockRegUsage
311};
312
313class SIScheduleBlockScheduler {
314 SIScheduleDAGMI *DAG;
315 SISchedulerBlockSchedulerVariant Variant;
316 std::vector<SIScheduleBlock*> Blocks;
317
318 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
319 std::set<unsigned> LiveRegs;
320 // Num of schedulable unscheduled blocks reading the register.
321 std::map<unsigned, unsigned> LiveRegsConsumers;
322
323 std::vector<unsigned> LastPosHighLatencyParentScheduled;
324 int LastPosWaitedHighLatency;
325
326 std::vector<SIScheduleBlock*> BlocksScheduled;
327 unsigned NumBlockScheduled;
328 std::vector<SIScheduleBlock*> ReadyBlocks;
329
330 unsigned VregCurrentUsage;
331 unsigned SregCurrentUsage;
332
333 // Currently is only approximation.
334 unsigned maxVregUsage;
335 unsigned maxSregUsage;
336
337 std::vector<unsigned> BlockNumPredsLeft;
338 std::vector<unsigned> BlockNumSuccsLeft;
339
340public:
341 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
342 SISchedulerBlockSchedulerVariant Variant,
343 SIScheduleBlocks BlocksStruct);
344 ~SIScheduleBlockScheduler() {};
345
346 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; };
347
348 unsigned getVGPRUsage() { return maxVregUsage; };
349 unsigned getSGPRUsage() { return maxSregUsage; };
350
351private:
352 struct SIBlockSchedCandidate : SISchedulerCandidate {
353 // The best Block candidate.
354 SIScheduleBlock *Block;
355
356 bool IsHighLatency;
357 int VGPRUsageDiff;
358 unsigned NumSuccessors;
359 unsigned NumHighLatencySuccessors;
360 unsigned LastPosHighLatParentScheduled;
361 unsigned Height;
362
363 SIBlockSchedCandidate()
364 : Block(nullptr) {}
365
366 bool isValid() const { return Block; }
367
368 // Copy the status of another candidate without changing policy.
369 void setBest(SIBlockSchedCandidate &Best) {
370 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
371 Block = Best.Block;
372 Reason = Best.Reason;
373 IsHighLatency = Best.IsHighLatency;
374 VGPRUsageDiff = Best.VGPRUsageDiff;
375 NumSuccessors = Best.NumSuccessors;
376 NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
377 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
378 Height = Best.Height;
379 }
380 };
381
382 bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
383 SIBlockSchedCandidate &TryCand);
384 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
385 SIBlockSchedCandidate &TryCand);
386 SIScheduleBlock *pickBlock();
387
388 void addLiveRegs(std::set<unsigned> &Regs);
389 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
390 void releaseBlockSuccs(SIScheduleBlock *Parent);
391 void blockScheduled(SIScheduleBlock *Block);
392
393 // Check register pressure change
394 // by scheduling a block with these LiveIn and LiveOut.
395 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
396 std::set<unsigned> &OutRegs);
397
398 void schedule();
399};
400
401struct SIScheduleBlockResult {
402 std::vector<unsigned> SUs;
403 unsigned MaxSGPRUsage;
404 unsigned MaxVGPRUsage;
405};
406
407class SIScheduler {
408 SIScheduleDAGMI *DAG;
409 SIScheduleBlockCreator BlockCreator;
410
411public:
412 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {};
413
414 ~SIScheduler() {};
415
416 struct SIScheduleBlockResult
417 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
418 SISchedulerBlockSchedulerVariant ScheduleVariant);
419};
420
421class SIScheduleDAGMI : public ScheduleDAGMILive {
422 const SIInstrInfo *SITII;
423 const SIRegisterInfo *SITRI;
424
425 std::vector<SUnit> SUnitsLinksBackup;
426
427 // For moveLowLatencies. After all Scheduling variants are tested.
428 std::vector<unsigned> ScheduledSUnits;
429 std::vector<unsigned> ScheduledSUnitsInv;
430
431 unsigned VGPRSetID;
432 unsigned SGPRSetID;
433
434public:
435 SIScheduleDAGMI(MachineSchedContext *C);
436
437 ~SIScheduleDAGMI() override;
438
439 // Entry point for the schedule.
440 void schedule() override;
441
442 // To init Block's RPTracker.
443 void initRPTracker(RegPressureTracker &RPTracker) {
Matthias Braun5d458612016-01-20 00:23:26 +0000444 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000445 }
446
447 MachineBasicBlock *getBB() { return BB; }
448 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; };
449 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; };
450 LiveIntervals *getLIS() { return LIS; }
451 MachineRegisterInfo *getMRI() { return &MRI; }
452 const TargetRegisterInfo *getTRI() { return TRI; }
453 SUnit& getEntrySU() { return EntrySU; };
454 SUnit& getExitSU() { return ExitSU; };
455
456 void restoreSULinksLeft();
457
458 template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
459 _Iterator End,
460 unsigned &VgprUsage,
461 unsigned &SgprUsage);
462 std::set<unsigned> getInRegs() {
Matthias Braun5d458612016-01-20 00:23:26 +0000463 std::set<unsigned> InRegs;
464 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
465 InRegs.insert(RegMaskPair.RegUnit);
466 }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000467 return InRegs;
468 };
469
470 unsigned getVGPRSetID() const { return VGPRSetID; }
471 unsigned getSGPRSetID() const { return SGPRSetID; }
472
473private:
474 void topologicalSort();
475 // After scheduling is done, improve low latency placements.
476 void moveLowLatencies();
477
478public:
479 // Some stats for scheduling inside blocks.
480 std::vector<unsigned> IsLowLatencySU;
481 std::vector<unsigned> LowLatencyOffset;
482 std::vector<unsigned> IsHighLatencySU;
483 // Topological sort
484 // Maps topological index to the node number.
485 std::vector<int> TopDownIndex2SU;
486 std::vector<int> BottomUpIndex2SU;
487};
488
489} // namespace llvm
490
491#endif /* SIMACHINESCHEDULER_H_ */