blob: 2dc4b346de7921ab8760f1c8cfa8fd67d8774842 [file] [log] [blame]
Eugene Zelenko2bc2f332016-12-09 22:06:55 +00001//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
Nicolai Haehnle02c32912016-01-13 16:10:10 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Machine Scheduler interface
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17
18#include "SIInstrInfo.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000019#include "llvm/CodeGen/MachineBasicBlock.h"
Nicolai Haehnle02c32912016-01-13 16:10:10 +000020#include "llvm/CodeGen/MachineScheduler.h"
21#include "llvm/CodeGen/RegisterPressure.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000022#include "llvm/CodeGen/ScheduleDAG.h"
23#include <cassert>
24#include <cstdint>
25#include <map>
26#include <memory>
27#include <set>
28#include <vector>
Nicolai Haehnle02c32912016-01-13 16:10:10 +000029
30namespace llvm {
31
32enum SIScheduleCandReason {
33 NoCand,
34 RegUsage,
35 Latency,
36 Successor,
37 Depth,
38 NodeOrder
39};
40
41struct SISchedulerCandidate {
42 // The reason for this candidate.
Eugene Zelenko66203762017-01-21 00:53:49 +000043 SIScheduleCandReason Reason = NoCand;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000044
45 // Set of reasons that apply to multiple candidates.
Eugene Zelenko66203762017-01-21 00:53:49 +000046 uint32_t RepeatReasonSet = 0;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000047
Eugene Zelenko66203762017-01-21 00:53:49 +000048 SISchedulerCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000049
50 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
51 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
52};
53
54class SIScheduleDAGMI;
55class SIScheduleBlockCreator;
56
57class SIScheduleBlock {
58 SIScheduleDAGMI *DAG;
59 SIScheduleBlockCreator *BC;
60
61 std::vector<SUnit*> SUnits;
62 std::map<unsigned, unsigned> NodeNum2Index;
63 std::vector<SUnit*> TopReadySUs;
64 std::vector<SUnit*> ScheduledSUnits;
65
66 /// The top of the unscheduled zone.
67 IntervalPressure TopPressure;
68 RegPressureTracker TopRPTracker;
69
70 // Pressure: number of said class of registers needed to
71 // store the live virtual and real registers.
72 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
73 // Pressure of additional registers required inside the block.
74 std::vector<unsigned> InternalAdditionnalPressure;
75 // Pressure of input and output registers
76 std::vector<unsigned> LiveInPressure;
77 std::vector<unsigned> LiveOutPressure;
78 // Registers required by the block, and outputs.
79 // We do track only virtual registers.
80 // Note that some registers are not 32 bits,
81 // and thus the pressure is not equal
82 // to the number of live registers.
83 std::set<unsigned> LiveInRegs;
84 std::set<unsigned> LiveOutRegs;
85
Eugene Zelenko66203762017-01-21 00:53:49 +000086 bool Scheduled = false;
87 bool HighLatencyBlock = false;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000088
89 std::vector<unsigned> HasLowLatencyNonWaitedParent;
90
91 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
92 unsigned ID;
93
94 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
95 std::vector<SIScheduleBlock*> Succs; // All blocks successors.
Eugene Zelenko66203762017-01-21 00:53:49 +000096 unsigned NumHighLatencySuccessors = 0;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000097
98public:
99 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
100 unsigned ID):
Eugene Zelenko66203762017-01-21 00:53:49 +0000101 DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000102
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000103 ~SIScheduleBlock() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000104
105 unsigned getID() const { return ID; }
106
107 /// Functions for Block construction.
108 void addUnit(SUnit *SU);
109
110 // When all SUs have been added.
111 void finalizeUnits();
112
113 // Add block pred, which has instruction predecessor of SU.
114 void addPred(SIScheduleBlock *Pred);
115 void addSucc(SIScheduleBlock *Succ);
116
117 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
118 const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
119
120 unsigned Height; // Maximum topdown path length to block without outputs
121 unsigned Depth; // Maximum bottomup path length to block without inputs
122
123 unsigned getNumHighLatencySuccessors() const {
124 return NumHighLatencySuccessors;
125 }
126
127 bool isHighLatencyBlock() { return HighLatencyBlock; }
128
129 // This is approximative.
130 // Ideally should take into accounts some instructions (rcp, etc)
131 // are 4 times slower.
132 int getCost() { return SUnits.size(); }
133
134 // The block Predecessors and Successors must be all registered
135 // before fastSchedule().
136 // Fast schedule with no particular requirement.
137 void fastSchedule();
138
139 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
140
141 // Complete schedule that will try to minimize reg pressure and
142 // low latencies, and will fill liveins and liveouts.
143 // Needs all MIs to be grouped between BeginBlock and EndBlock.
144 // The MIs can be moved after the scheduling,
145 // it is just used to allow correct track of live registers.
146 void schedule(MachineBasicBlock::iterator BeginBlock,
147 MachineBasicBlock::iterator EndBlock);
148
149 bool isScheduled() { return Scheduled; }
150
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000151 // Needs the block to be scheduled inside
152 // TODO: find a way to compute it.
153 std::vector<unsigned> &getInternalAdditionnalRegUsage() {
154 return InternalAdditionnalPressure;
155 }
156
157 std::set<unsigned> &getInRegs() { return LiveInRegs; }
158 std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
159
160 void printDebug(bool Full);
161
162private:
163 struct SISchedCandidate : SISchedulerCandidate {
164 // The best SUnit candidate.
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000165 SUnit *SU = nullptr;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000166
167 unsigned SGPRUsage;
168 unsigned VGPRUsage;
169 bool IsLowLatency;
170 unsigned LowLatencyOffset;
171 bool HasLowLatencyNonWaitedParent;
172
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000173 SISchedCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000174
175 bool isValid() const { return SU; }
176
177 // Copy the status of another candidate without changing policy.
178 void setBest(SISchedCandidate &Best) {
179 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
180 SU = Best.SU;
181 Reason = Best.Reason;
182 SGPRUsage = Best.SGPRUsage;
183 VGPRUsage = Best.VGPRUsage;
184 IsLowLatency = Best.IsLowLatency;
185 LowLatencyOffset = Best.LowLatencyOffset;
186 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
187 }
188 };
189
190 void undoSchedule();
191
192 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
193 void releaseSucc(SUnit *SU, SDep *SuccEdge);
194 // InOrOutBlock: restrict to links pointing inside the block (true),
195 // or restrict to links pointing outside the block (false).
196 void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
197
198 void nodeScheduled(SUnit *SU);
199 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
200 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
201 SUnit* pickNode();
202 void traceCandidate(const SISchedCandidate &Cand);
203 void initRegPressure(MachineBasicBlock::iterator BeginBlock,
204 MachineBasicBlock::iterator EndBlock);
205};
206
207struct SIScheduleBlocks {
208 std::vector<SIScheduleBlock*> Blocks;
209 std::vector<int> TopDownIndex2Block;
210 std::vector<int> TopDownBlock2Index;
211};
212
213enum SISchedulerBlockCreatorVariant {
Eugene Zelenko66203762017-01-21 00:53:49 +0000214 LatenciesAlone,
215 LatenciesGrouped,
216 LatenciesAlonePlusConsecutive
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000217};
218
219class SIScheduleBlockCreator {
220 SIScheduleDAGMI *DAG;
221 // unique_ptr handles freeing memory for us.
222 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
223 std::map<SISchedulerBlockCreatorVariant,
224 SIScheduleBlocks> Blocks;
225 std::vector<SIScheduleBlock*> CurrentBlocks;
226 std::vector<int> Node2CurrentBlock;
227
228 // Topological sort
229 // Maps topological index to the node number.
230 std::vector<int> TopDownIndex2Block;
231 std::vector<int> TopDownBlock2Index;
232 std::vector<int> BottomUpIndex2Block;
233
234 // 0 -> Color not given.
235 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
236 // Above -> Other groups.
237 int NextReservedID;
238 int NextNonReservedID;
239 std::vector<int> CurrentColoring;
240 std::vector<int> CurrentTopDownReservedDependencyColoring;
241 std::vector<int> CurrentBottomUpReservedDependencyColoring;
242
243public:
244 SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
245 ~SIScheduleBlockCreator();
246
247 SIScheduleBlocks
248 getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
249
250 bool isSUInBlock(SUnit *SU, unsigned ID);
251
252private:
253 // Give a Reserved color to every high latency.
254 void colorHighLatenciesAlone();
255
256 // Create groups of high latencies with a Reserved color.
257 void colorHighLatenciesGroups();
258
259 // Compute coloring for topdown and bottom traversals with
260 // different colors depending on dependencies on Reserved colors.
261 void colorComputeReservedDependencies();
262
263 // Give color to all non-colored SUs according to Reserved groups dependencies.
264 void colorAccordingToReservedDependencies();
265
266 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
267 // The new colors are computed according to the dependencies on the other blocks
268 // formed with colorAccordingToReservedDependencies.
269 void colorEndsAccordingToDependencies();
270
271 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
272 void colorForceConsecutiveOrderInGroup();
273
274 // Merge Constant loads that have all their users into another group to the group.
275 // (TODO: else if all their users depend on the same group, put them there)
276 void colorMergeConstantLoadsNextGroup();
277
278 // Merge SUs that have all their users into another group to the group
279 void colorMergeIfPossibleNextGroup();
280
281 // Merge SUs that have all their users into another group to the group,
282 // but only for Reserved groups.
283 void colorMergeIfPossibleNextGroupOnlyForReserved();
284
285 // Merge SUs that have all their users into another group to the group,
286 // but only if the group is no more than a few SUs.
287 void colorMergeIfPossibleSmallGroupsToNextGroup();
288
289 // Divides Blocks with important size.
290 // Idea of implementation: attribute new colors depending on topdown and
291 // bottom up links to other blocks.
292 void cutHugeBlocks();
293
294 // Put in one group all instructions with no users in this scheduling region
295 // (we'd want these groups be at the end).
296 void regroupNoUserInstructions();
297
298 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
299
300 void topologicalSort();
301
302 void scheduleInsideBlocks();
303
304 void fillStats();
305};
306
307enum SISchedulerBlockSchedulerVariant {
308 BlockLatencyRegUsage,
309 BlockRegUsageLatency,
310 BlockRegUsage
311};
312
313class SIScheduleBlockScheduler {
314 SIScheduleDAGMI *DAG;
315 SISchedulerBlockSchedulerVariant Variant;
316 std::vector<SIScheduleBlock*> Blocks;
317
318 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
319 std::set<unsigned> LiveRegs;
320 // Num of schedulable unscheduled blocks reading the register.
321 std::map<unsigned, unsigned> LiveRegsConsumers;
322
323 std::vector<unsigned> LastPosHighLatencyParentScheduled;
324 int LastPosWaitedHighLatency;
325
326 std::vector<SIScheduleBlock*> BlocksScheduled;
327 unsigned NumBlockScheduled;
328 std::vector<SIScheduleBlock*> ReadyBlocks;
329
330 unsigned VregCurrentUsage;
331 unsigned SregCurrentUsage;
332
333 // Currently is only approximation.
334 unsigned maxVregUsage;
335 unsigned maxSregUsage;
336
337 std::vector<unsigned> BlockNumPredsLeft;
338 std::vector<unsigned> BlockNumSuccsLeft;
339
340public:
341 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
342 SISchedulerBlockSchedulerVariant Variant,
343 SIScheduleBlocks BlocksStruct);
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000344 ~SIScheduleBlockScheduler() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000345
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000346 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000347
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000348 unsigned getVGPRUsage() { return maxVregUsage; }
349 unsigned getSGPRUsage() { return maxSregUsage; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000350
351private:
352 struct SIBlockSchedCandidate : SISchedulerCandidate {
353 // The best Block candidate.
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000354 SIScheduleBlock *Block = nullptr;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000355
356 bool IsHighLatency;
357 int VGPRUsageDiff;
358 unsigned NumSuccessors;
359 unsigned NumHighLatencySuccessors;
360 unsigned LastPosHighLatParentScheduled;
361 unsigned Height;
362
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000363 SIBlockSchedCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000364
365 bool isValid() const { return Block; }
366
367 // Copy the status of another candidate without changing policy.
368 void setBest(SIBlockSchedCandidate &Best) {
369 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
370 Block = Best.Block;
371 Reason = Best.Reason;
372 IsHighLatency = Best.IsHighLatency;
373 VGPRUsageDiff = Best.VGPRUsageDiff;
374 NumSuccessors = Best.NumSuccessors;
375 NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
376 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
377 Height = Best.Height;
378 }
379 };
380
381 bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
382 SIBlockSchedCandidate &TryCand);
383 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
384 SIBlockSchedCandidate &TryCand);
385 SIScheduleBlock *pickBlock();
386
387 void addLiveRegs(std::set<unsigned> &Regs);
388 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
389 void releaseBlockSuccs(SIScheduleBlock *Parent);
390 void blockScheduled(SIScheduleBlock *Block);
391
392 // Check register pressure change
393 // by scheduling a block with these LiveIn and LiveOut.
394 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
395 std::set<unsigned> &OutRegs);
396
397 void schedule();
398};
399
400struct SIScheduleBlockResult {
401 std::vector<unsigned> SUs;
402 unsigned MaxSGPRUsage;
403 unsigned MaxVGPRUsage;
404};
405
406class SIScheduler {
407 SIScheduleDAGMI *DAG;
408 SIScheduleBlockCreator BlockCreator;
409
410public:
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000411 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000412
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000413 ~SIScheduler() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000414
415 struct SIScheduleBlockResult
416 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
417 SISchedulerBlockSchedulerVariant ScheduleVariant);
418};
419
Matt Arsenault6b6a2c32016-03-11 08:00:27 +0000420class SIScheduleDAGMI final : public ScheduleDAGMILive {
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000421 const SIInstrInfo *SITII;
422 const SIRegisterInfo *SITRI;
423
424 std::vector<SUnit> SUnitsLinksBackup;
425
426 // For moveLowLatencies. After all Scheduling variants are tested.
427 std::vector<unsigned> ScheduledSUnits;
428 std::vector<unsigned> ScheduledSUnitsInv;
429
430 unsigned VGPRSetID;
431 unsigned SGPRSetID;
432
433public:
434 SIScheduleDAGMI(MachineSchedContext *C);
435
436 ~SIScheduleDAGMI() override;
437
438 // Entry point for the schedule.
439 void schedule() override;
440
441 // To init Block's RPTracker.
442 void initRPTracker(RegPressureTracker &RPTracker) {
Alexander Timofeev9f61fea2017-02-14 14:29:05 +0000443 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000444 }
445
446 MachineBasicBlock *getBB() { return BB; }
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000447 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
448 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000449 LiveIntervals *getLIS() { return LIS; }
450 MachineRegisterInfo *getMRI() { return &MRI; }
451 const TargetRegisterInfo *getTRI() { return TRI; }
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000452 SUnit& getEntrySU() { return EntrySU; }
453 SUnit& getExitSU() { return ExitSU; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000454
455 void restoreSULinksLeft();
456
457 template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
458 _Iterator End,
459 unsigned &VgprUsage,
460 unsigned &SgprUsage);
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000461
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000462 std::set<unsigned> getInRegs() {
Matthias Braun5d458612016-01-20 00:23:26 +0000463 std::set<unsigned> InRegs;
464 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
465 InRegs.insert(RegMaskPair.RegUnit);
466 }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000467 return InRegs;
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000468 }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000469
470 unsigned getVGPRSetID() const { return VGPRSetID; }
471 unsigned getSGPRSetID() const { return SGPRSetID; }
472
473private:
474 void topologicalSort();
475 // After scheduling is done, improve low latency placements.
476 void moveLowLatencies();
477
478public:
479 // Some stats for scheduling inside blocks.
480 std::vector<unsigned> IsLowLatencySU;
481 std::vector<unsigned> LowLatencyOffset;
482 std::vector<unsigned> IsHighLatencySU;
483 // Topological sort
484 // Maps topological index to the node number.
485 std::vector<int> TopDownIndex2SU;
486 std::vector<int> BottomUpIndex2SU;
487};
488
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000489} // end namespace llvm
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000490
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000491#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H