blob: 77c07350d325becb3a5c635449490e926a7d8eed [file] [log] [blame]
Eugene Zelenko2bc2f332016-12-09 22:06:55 +00001//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
Nicolai Haehnle02c32912016-01-13 16:10:10 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Machine Scheduler interface
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17
18#include "SIInstrInfo.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000019#include "llvm/CodeGen/MachineBasicBlock.h"
Nicolai Haehnle02c32912016-01-13 16:10:10 +000020#include "llvm/CodeGen/MachineScheduler.h"
21#include "llvm/CodeGen/RegisterPressure.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000022#include "llvm/CodeGen/ScheduleDAG.h"
23#include <cassert>
24#include <cstdint>
25#include <map>
26#include <memory>
27#include <set>
28#include <vector>
Nicolai Haehnle02c32912016-01-13 16:10:10 +000029
30namespace llvm {
31
32enum SIScheduleCandReason {
33 NoCand,
34 RegUsage,
35 Latency,
36 Successor,
37 Depth,
38 NodeOrder
39};
40
41struct SISchedulerCandidate {
42 // The reason for this candidate.
43 SIScheduleCandReason Reason;
44
45 // Set of reasons that apply to multiple candidates.
46 uint32_t RepeatReasonSet;
47
48 SISchedulerCandidate()
49 : Reason(NoCand), RepeatReasonSet(0) {}
50
51 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
52 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
53};
54
55class SIScheduleDAGMI;
56class SIScheduleBlockCreator;
57
58class SIScheduleBlock {
59 SIScheduleDAGMI *DAG;
60 SIScheduleBlockCreator *BC;
61
62 std::vector<SUnit*> SUnits;
63 std::map<unsigned, unsigned> NodeNum2Index;
64 std::vector<SUnit*> TopReadySUs;
65 std::vector<SUnit*> ScheduledSUnits;
66
67 /// The top of the unscheduled zone.
68 IntervalPressure TopPressure;
69 RegPressureTracker TopRPTracker;
70
71 // Pressure: number of said class of registers needed to
72 // store the live virtual and real registers.
73 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
74 // Pressure of additional registers required inside the block.
75 std::vector<unsigned> InternalAdditionnalPressure;
76 // Pressure of input and output registers
77 std::vector<unsigned> LiveInPressure;
78 std::vector<unsigned> LiveOutPressure;
79 // Registers required by the block, and outputs.
80 // We do track only virtual registers.
81 // Note that some registers are not 32 bits,
82 // and thus the pressure is not equal
83 // to the number of live registers.
84 std::set<unsigned> LiveInRegs;
85 std::set<unsigned> LiveOutRegs;
86
87 bool Scheduled;
88 bool HighLatencyBlock;
89
90 std::vector<unsigned> HasLowLatencyNonWaitedParent;
91
92 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
93 unsigned ID;
94
95 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
96 std::vector<SIScheduleBlock*> Succs; // All blocks successors.
97 unsigned NumHighLatencySuccessors;
98
99public:
100 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
101 unsigned ID):
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000102 DAG(DAG), BC(BC), TopRPTracker(TopPressure), Scheduled(false),
103 HighLatencyBlock(false), ID(ID), NumHighLatencySuccessors(0) {}
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000104
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000105 ~SIScheduleBlock() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000106
107 unsigned getID() const { return ID; }
108
109 /// Functions for Block construction.
110 void addUnit(SUnit *SU);
111
112 // When all SUs have been added.
113 void finalizeUnits();
114
115 // Add block pred, which has instruction predecessor of SU.
116 void addPred(SIScheduleBlock *Pred);
117 void addSucc(SIScheduleBlock *Succ);
118
119 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
120 const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
121
122 unsigned Height; // Maximum topdown path length to block without outputs
123 unsigned Depth; // Maximum bottomup path length to block without inputs
124
125 unsigned getNumHighLatencySuccessors() const {
126 return NumHighLatencySuccessors;
127 }
128
129 bool isHighLatencyBlock() { return HighLatencyBlock; }
130
131 // This is approximative.
132 // Ideally should take into accounts some instructions (rcp, etc)
133 // are 4 times slower.
134 int getCost() { return SUnits.size(); }
135
136 // The block Predecessors and Successors must be all registered
137 // before fastSchedule().
138 // Fast schedule with no particular requirement.
139 void fastSchedule();
140
141 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
142
143 // Complete schedule that will try to minimize reg pressure and
144 // low latencies, and will fill liveins and liveouts.
145 // Needs all MIs to be grouped between BeginBlock and EndBlock.
146 // The MIs can be moved after the scheduling,
147 // it is just used to allow correct track of live registers.
148 void schedule(MachineBasicBlock::iterator BeginBlock,
149 MachineBasicBlock::iterator EndBlock);
150
151 bool isScheduled() { return Scheduled; }
152
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000153 // Needs the block to be scheduled inside
154 // TODO: find a way to compute it.
155 std::vector<unsigned> &getInternalAdditionnalRegUsage() {
156 return InternalAdditionnalPressure;
157 }
158
159 std::set<unsigned> &getInRegs() { return LiveInRegs; }
160 std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
161
162 void printDebug(bool Full);
163
164private:
165 struct SISchedCandidate : SISchedulerCandidate {
166 // The best SUnit candidate.
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000167 SUnit *SU = nullptr;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000168
169 unsigned SGPRUsage;
170 unsigned VGPRUsage;
171 bool IsLowLatency;
172 unsigned LowLatencyOffset;
173 bool HasLowLatencyNonWaitedParent;
174
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000175 SISchedCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000176
177 bool isValid() const { return SU; }
178
179 // Copy the status of another candidate without changing policy.
180 void setBest(SISchedCandidate &Best) {
181 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
182 SU = Best.SU;
183 Reason = Best.Reason;
184 SGPRUsage = Best.SGPRUsage;
185 VGPRUsage = Best.VGPRUsage;
186 IsLowLatency = Best.IsLowLatency;
187 LowLatencyOffset = Best.LowLatencyOffset;
188 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
189 }
190 };
191
192 void undoSchedule();
193
194 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
195 void releaseSucc(SUnit *SU, SDep *SuccEdge);
196 // InOrOutBlock: restrict to links pointing inside the block (true),
197 // or restrict to links pointing outside the block (false).
198 void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
199
200 void nodeScheduled(SUnit *SU);
201 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
202 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
203 SUnit* pickNode();
204 void traceCandidate(const SISchedCandidate &Cand);
205 void initRegPressure(MachineBasicBlock::iterator BeginBlock,
206 MachineBasicBlock::iterator EndBlock);
207};
208
209struct SIScheduleBlocks {
210 std::vector<SIScheduleBlock*> Blocks;
211 std::vector<int> TopDownIndex2Block;
212 std::vector<int> TopDownBlock2Index;
213};
214
215enum SISchedulerBlockCreatorVariant {
216 LatenciesAlone,
217 LatenciesGrouped,
218 LatenciesAlonePlusConsecutive
219};
220
221class SIScheduleBlockCreator {
222 SIScheduleDAGMI *DAG;
223 // unique_ptr handles freeing memory for us.
224 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
225 std::map<SISchedulerBlockCreatorVariant,
226 SIScheduleBlocks> Blocks;
227 std::vector<SIScheduleBlock*> CurrentBlocks;
228 std::vector<int> Node2CurrentBlock;
229
230 // Topological sort
231 // Maps topological index to the node number.
232 std::vector<int> TopDownIndex2Block;
233 std::vector<int> TopDownBlock2Index;
234 std::vector<int> BottomUpIndex2Block;
235
236 // 0 -> Color not given.
237 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
238 // Above -> Other groups.
239 int NextReservedID;
240 int NextNonReservedID;
241 std::vector<int> CurrentColoring;
242 std::vector<int> CurrentTopDownReservedDependencyColoring;
243 std::vector<int> CurrentBottomUpReservedDependencyColoring;
244
245public:
246 SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
247 ~SIScheduleBlockCreator();
248
249 SIScheduleBlocks
250 getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
251
252 bool isSUInBlock(SUnit *SU, unsigned ID);
253
254private:
255 // Give a Reserved color to every high latency.
256 void colorHighLatenciesAlone();
257
258 // Create groups of high latencies with a Reserved color.
259 void colorHighLatenciesGroups();
260
261 // Compute coloring for topdown and bottom traversals with
262 // different colors depending on dependencies on Reserved colors.
263 void colorComputeReservedDependencies();
264
265 // Give color to all non-colored SUs according to Reserved groups dependencies.
266 void colorAccordingToReservedDependencies();
267
268 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
269 // The new colors are computed according to the dependencies on the other blocks
270 // formed with colorAccordingToReservedDependencies.
271 void colorEndsAccordingToDependencies();
272
273 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
274 void colorForceConsecutiveOrderInGroup();
275
276 // Merge Constant loads that have all their users into another group to the group.
277 // (TODO: else if all their users depend on the same group, put them there)
278 void colorMergeConstantLoadsNextGroup();
279
280 // Merge SUs that have all their users into another group to the group
281 void colorMergeIfPossibleNextGroup();
282
283 // Merge SUs that have all their users into another group to the group,
284 // but only for Reserved groups.
285 void colorMergeIfPossibleNextGroupOnlyForReserved();
286
287 // Merge SUs that have all their users into another group to the group,
288 // but only if the group is no more than a few SUs.
289 void colorMergeIfPossibleSmallGroupsToNextGroup();
290
291 // Divides Blocks with important size.
292 // Idea of implementation: attribute new colors depending on topdown and
293 // bottom up links to other blocks.
294 void cutHugeBlocks();
295
296 // Put in one group all instructions with no users in this scheduling region
297 // (we'd want these groups be at the end).
298 void regroupNoUserInstructions();
299
300 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
301
302 void topologicalSort();
303
304 void scheduleInsideBlocks();
305
306 void fillStats();
307};
308
309enum SISchedulerBlockSchedulerVariant {
310 BlockLatencyRegUsage,
311 BlockRegUsageLatency,
312 BlockRegUsage
313};
314
315class SIScheduleBlockScheduler {
316 SIScheduleDAGMI *DAG;
317 SISchedulerBlockSchedulerVariant Variant;
318 std::vector<SIScheduleBlock*> Blocks;
319
320 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
321 std::set<unsigned> LiveRegs;
322 // Num of schedulable unscheduled blocks reading the register.
323 std::map<unsigned, unsigned> LiveRegsConsumers;
324
325 std::vector<unsigned> LastPosHighLatencyParentScheduled;
326 int LastPosWaitedHighLatency;
327
328 std::vector<SIScheduleBlock*> BlocksScheduled;
329 unsigned NumBlockScheduled;
330 std::vector<SIScheduleBlock*> ReadyBlocks;
331
332 unsigned VregCurrentUsage;
333 unsigned SregCurrentUsage;
334
335 // Currently is only approximation.
336 unsigned maxVregUsage;
337 unsigned maxSregUsage;
338
339 std::vector<unsigned> BlockNumPredsLeft;
340 std::vector<unsigned> BlockNumSuccsLeft;
341
342public:
343 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
344 SISchedulerBlockSchedulerVariant Variant,
345 SIScheduleBlocks BlocksStruct);
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000346 ~SIScheduleBlockScheduler() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000347
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000348 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000349
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000350 unsigned getVGPRUsage() { return maxVregUsage; }
351 unsigned getSGPRUsage() { return maxSregUsage; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000352
353private:
354 struct SIBlockSchedCandidate : SISchedulerCandidate {
355 // The best Block candidate.
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000356 SIScheduleBlock *Block = nullptr;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000357
358 bool IsHighLatency;
359 int VGPRUsageDiff;
360 unsigned NumSuccessors;
361 unsigned NumHighLatencySuccessors;
362 unsigned LastPosHighLatParentScheduled;
363 unsigned Height;
364
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000365 SIBlockSchedCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000366
367 bool isValid() const { return Block; }
368
369 // Copy the status of another candidate without changing policy.
370 void setBest(SIBlockSchedCandidate &Best) {
371 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
372 Block = Best.Block;
373 Reason = Best.Reason;
374 IsHighLatency = Best.IsHighLatency;
375 VGPRUsageDiff = Best.VGPRUsageDiff;
376 NumSuccessors = Best.NumSuccessors;
377 NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
378 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
379 Height = Best.Height;
380 }
381 };
382
383 bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
384 SIBlockSchedCandidate &TryCand);
385 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
386 SIBlockSchedCandidate &TryCand);
387 SIScheduleBlock *pickBlock();
388
389 void addLiveRegs(std::set<unsigned> &Regs);
390 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
391 void releaseBlockSuccs(SIScheduleBlock *Parent);
392 void blockScheduled(SIScheduleBlock *Block);
393
394 // Check register pressure change
395 // by scheduling a block with these LiveIn and LiveOut.
396 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
397 std::set<unsigned> &OutRegs);
398
399 void schedule();
400};
401
402struct SIScheduleBlockResult {
403 std::vector<unsigned> SUs;
404 unsigned MaxSGPRUsage;
405 unsigned MaxVGPRUsage;
406};
407
408class SIScheduler {
409 SIScheduleDAGMI *DAG;
410 SIScheduleBlockCreator BlockCreator;
411
412public:
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000413 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000414
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000415 ~SIScheduler() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000416
417 struct SIScheduleBlockResult
418 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
419 SISchedulerBlockSchedulerVariant ScheduleVariant);
420};
421
Matt Arsenault6b6a2c32016-03-11 08:00:27 +0000422class SIScheduleDAGMI final : public ScheduleDAGMILive {
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000423 const SIInstrInfo *SITII;
424 const SIRegisterInfo *SITRI;
425
426 std::vector<SUnit> SUnitsLinksBackup;
427
428 // For moveLowLatencies. After all Scheduling variants are tested.
429 std::vector<unsigned> ScheduledSUnits;
430 std::vector<unsigned> ScheduledSUnitsInv;
431
432 unsigned VGPRSetID;
433 unsigned SGPRSetID;
434
435public:
436 SIScheduleDAGMI(MachineSchedContext *C);
437
438 ~SIScheduleDAGMI() override;
439
440 // Entry point for the schedule.
441 void schedule() override;
442
443 // To init Block's RPTracker.
444 void initRPTracker(RegPressureTracker &RPTracker) {
Matthias Braun5d458612016-01-20 00:23:26 +0000445 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000446 }
447
448 MachineBasicBlock *getBB() { return BB; }
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000449 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
450 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000451 LiveIntervals *getLIS() { return LIS; }
452 MachineRegisterInfo *getMRI() { return &MRI; }
453 const TargetRegisterInfo *getTRI() { return TRI; }
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000454 SUnit& getEntrySU() { return EntrySU; }
455 SUnit& getExitSU() { return ExitSU; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000456
457 void restoreSULinksLeft();
458
459 template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
460 _Iterator End,
461 unsigned &VgprUsage,
462 unsigned &SgprUsage);
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000463
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000464 std::set<unsigned> getInRegs() {
Matthias Braun5d458612016-01-20 00:23:26 +0000465 std::set<unsigned> InRegs;
466 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
467 InRegs.insert(RegMaskPair.RegUnit);
468 }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000469 return InRegs;
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000470 }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000471
472 unsigned getVGPRSetID() const { return VGPRSetID; }
473 unsigned getSGPRSetID() const { return SGPRSetID; }
474
475private:
476 void topologicalSort();
477 // After scheduling is done, improve low latency placements.
478 void moveLowLatencies();
479
480public:
481 // Some stats for scheduling inside blocks.
482 std::vector<unsigned> IsLowLatencySU;
483 std::vector<unsigned> LowLatencyOffset;
484 std::vector<unsigned> IsHighLatencySU;
485 // Topological sort
486 // Maps topological index to the node number.
487 std::vector<int> TopDownIndex2SU;
488 std::vector<int> BottomUpIndex2SU;
489};
490
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000491} // end namespace llvm
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000492
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000493#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H