blob: c28a7be4d03a54f47d28df71b8a940fc6333fb38 [file] [log] [blame]
Eugene Zelenko2bc2f332016-12-09 22:06:55 +00001//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
Nicolai Haehnle02c32912016-01-13 16:10:10 +00002//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Nicolai Haehnle02c32912016-01-13 16:10:10 +00006//
7//===----------------------------------------------------------------------===//
8//
9/// \file
Adrian Prantl5f8f34e42018-05-01 15:54:18 +000010/// SI Machine Scheduler interface
Nicolai Haehnle02c32912016-01-13 16:10:10 +000011//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
15#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16
17#include "SIInstrInfo.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000018#include "llvm/CodeGen/MachineBasicBlock.h"
Nicolai Haehnle02c32912016-01-13 16:10:10 +000019#include "llvm/CodeGen/MachineScheduler.h"
20#include "llvm/CodeGen/RegisterPressure.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000021#include "llvm/CodeGen/ScheduleDAG.h"
22#include <cassert>
23#include <cstdint>
24#include <map>
25#include <memory>
26#include <set>
27#include <vector>
Nicolai Haehnle02c32912016-01-13 16:10:10 +000028
29namespace llvm {
30
31enum SIScheduleCandReason {
32 NoCand,
33 RegUsage,
34 Latency,
35 Successor,
36 Depth,
37 NodeOrder
38};
39
40struct SISchedulerCandidate {
41 // The reason for this candidate.
Eugene Zelenko66203762017-01-21 00:53:49 +000042 SIScheduleCandReason Reason = NoCand;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000043
44 // Set of reasons that apply to multiple candidates.
Eugene Zelenko66203762017-01-21 00:53:49 +000045 uint32_t RepeatReasonSet = 0;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000046
Eugene Zelenko66203762017-01-21 00:53:49 +000047 SISchedulerCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000048
49 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
50 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
51};
52
53class SIScheduleDAGMI;
54class SIScheduleBlockCreator;
55
Valery Pykhtinfb990552017-03-27 18:22:39 +000056enum SIScheduleBlockLinkKind {
57 NoData,
58 Data
59};
60
Nicolai Haehnle02c32912016-01-13 16:10:10 +000061class SIScheduleBlock {
62 SIScheduleDAGMI *DAG;
63 SIScheduleBlockCreator *BC;
64
65 std::vector<SUnit*> SUnits;
66 std::map<unsigned, unsigned> NodeNum2Index;
67 std::vector<SUnit*> TopReadySUs;
68 std::vector<SUnit*> ScheduledSUnits;
69
70 /// The top of the unscheduled zone.
71 IntervalPressure TopPressure;
72 RegPressureTracker TopRPTracker;
73
74 // Pressure: number of said class of registers needed to
75 // store the live virtual and real registers.
76 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
77 // Pressure of additional registers required inside the block.
78 std::vector<unsigned> InternalAdditionnalPressure;
79 // Pressure of input and output registers
80 std::vector<unsigned> LiveInPressure;
81 std::vector<unsigned> LiveOutPressure;
82 // Registers required by the block, and outputs.
83 // We do track only virtual registers.
84 // Note that some registers are not 32 bits,
85 // and thus the pressure is not equal
86 // to the number of live registers.
87 std::set<unsigned> LiveInRegs;
88 std::set<unsigned> LiveOutRegs;
89
Eugene Zelenko66203762017-01-21 00:53:49 +000090 bool Scheduled = false;
91 bool HighLatencyBlock = false;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000092
93 std::vector<unsigned> HasLowLatencyNonWaitedParent;
94
95 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
96 unsigned ID;
97
98 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
Valery Pykhtinfb990552017-03-27 18:22:39 +000099 // All blocks successors, and the kind of link
100 std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
Eugene Zelenko66203762017-01-21 00:53:49 +0000101 unsigned NumHighLatencySuccessors = 0;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000102
103public:
104 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
105 unsigned ID):
Eugene Zelenko66203762017-01-21 00:53:49 +0000106 DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000107
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000108 ~SIScheduleBlock() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000109
110 unsigned getID() const { return ID; }
111
112 /// Functions for Block construction.
113 void addUnit(SUnit *SU);
114
115 // When all SUs have been added.
116 void finalizeUnits();
117
118 // Add block pred, which has instruction predecessor of SU.
119 void addPred(SIScheduleBlock *Pred);
Valery Pykhtinfb990552017-03-27 18:22:39 +0000120 void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000121
122 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
Valery Pykhtinfb990552017-03-27 18:22:39 +0000123 ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
124 getSuccs() const { return Succs; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000125
126 unsigned Height; // Maximum topdown path length to block without outputs
127 unsigned Depth; // Maximum bottomup path length to block without inputs
128
129 unsigned getNumHighLatencySuccessors() const {
130 return NumHighLatencySuccessors;
131 }
132
133 bool isHighLatencyBlock() { return HighLatencyBlock; }
134
135 // This is approximative.
136 // Ideally should take into accounts some instructions (rcp, etc)
137 // are 4 times slower.
138 int getCost() { return SUnits.size(); }
139
140 // The block Predecessors and Successors must be all registered
141 // before fastSchedule().
142 // Fast schedule with no particular requirement.
143 void fastSchedule();
144
145 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
146
147 // Complete schedule that will try to minimize reg pressure and
148 // low latencies, and will fill liveins and liveouts.
149 // Needs all MIs to be grouped between BeginBlock and EndBlock.
150 // The MIs can be moved after the scheduling,
151 // it is just used to allow correct track of live registers.
152 void schedule(MachineBasicBlock::iterator BeginBlock,
153 MachineBasicBlock::iterator EndBlock);
154
155 bool isScheduled() { return Scheduled; }
156
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000157 // Needs the block to be scheduled inside
158 // TODO: find a way to compute it.
159 std::vector<unsigned> &getInternalAdditionnalRegUsage() {
160 return InternalAdditionnalPressure;
161 }
162
163 std::set<unsigned> &getInRegs() { return LiveInRegs; }
164 std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
165
166 void printDebug(bool Full);
167
168private:
169 struct SISchedCandidate : SISchedulerCandidate {
170 // The best SUnit candidate.
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000171 SUnit *SU = nullptr;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000172
173 unsigned SGPRUsage;
174 unsigned VGPRUsage;
175 bool IsLowLatency;
176 unsigned LowLatencyOffset;
177 bool HasLowLatencyNonWaitedParent;
178
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000179 SISchedCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000180
181 bool isValid() const { return SU; }
182
183 // Copy the status of another candidate without changing policy.
184 void setBest(SISchedCandidate &Best) {
185 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
186 SU = Best.SU;
187 Reason = Best.Reason;
188 SGPRUsage = Best.SGPRUsage;
189 VGPRUsage = Best.VGPRUsage;
190 IsLowLatency = Best.IsLowLatency;
191 LowLatencyOffset = Best.LowLatencyOffset;
192 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
193 }
194 };
195
196 void undoSchedule();
197
198 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
199 void releaseSucc(SUnit *SU, SDep *SuccEdge);
200 // InOrOutBlock: restrict to links pointing inside the block (true),
201 // or restrict to links pointing outside the block (false).
202 void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
203
204 void nodeScheduled(SUnit *SU);
205 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
206 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
207 SUnit* pickNode();
208 void traceCandidate(const SISchedCandidate &Cand);
209 void initRegPressure(MachineBasicBlock::iterator BeginBlock,
210 MachineBasicBlock::iterator EndBlock);
211};
212
213struct SIScheduleBlocks {
214 std::vector<SIScheduleBlock*> Blocks;
215 std::vector<int> TopDownIndex2Block;
216 std::vector<int> TopDownBlock2Index;
217};
218
219enum SISchedulerBlockCreatorVariant {
Eugene Zelenko66203762017-01-21 00:53:49 +0000220 LatenciesAlone,
221 LatenciesGrouped,
222 LatenciesAlonePlusConsecutive
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000223};
224
225class SIScheduleBlockCreator {
226 SIScheduleDAGMI *DAG;
227 // unique_ptr handles freeing memory for us.
228 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
229 std::map<SISchedulerBlockCreatorVariant,
230 SIScheduleBlocks> Blocks;
231 std::vector<SIScheduleBlock*> CurrentBlocks;
232 std::vector<int> Node2CurrentBlock;
233
234 // Topological sort
235 // Maps topological index to the node number.
236 std::vector<int> TopDownIndex2Block;
237 std::vector<int> TopDownBlock2Index;
238 std::vector<int> BottomUpIndex2Block;
239
240 // 0 -> Color not given.
241 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
242 // Above -> Other groups.
243 int NextReservedID;
244 int NextNonReservedID;
245 std::vector<int> CurrentColoring;
246 std::vector<int> CurrentTopDownReservedDependencyColoring;
247 std::vector<int> CurrentBottomUpReservedDependencyColoring;
248
249public:
250 SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
251 ~SIScheduleBlockCreator();
252
253 SIScheduleBlocks
254 getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
255
256 bool isSUInBlock(SUnit *SU, unsigned ID);
257
258private:
259 // Give a Reserved color to every high latency.
260 void colorHighLatenciesAlone();
261
262 // Create groups of high latencies with a Reserved color.
263 void colorHighLatenciesGroups();
264
265 // Compute coloring for topdown and bottom traversals with
266 // different colors depending on dependencies on Reserved colors.
267 void colorComputeReservedDependencies();
268
269 // Give color to all non-colored SUs according to Reserved groups dependencies.
270 void colorAccordingToReservedDependencies();
271
272 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
273 // The new colors are computed according to the dependencies on the other blocks
274 // formed with colorAccordingToReservedDependencies.
275 void colorEndsAccordingToDependencies();
276
277 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
278 void colorForceConsecutiveOrderInGroup();
279
280 // Merge Constant loads that have all their users into another group to the group.
281 // (TODO: else if all their users depend on the same group, put them there)
282 void colorMergeConstantLoadsNextGroup();
283
284 // Merge SUs that have all their users into another group to the group
285 void colorMergeIfPossibleNextGroup();
286
287 // Merge SUs that have all their users into another group to the group,
288 // but only for Reserved groups.
289 void colorMergeIfPossibleNextGroupOnlyForReserved();
290
291 // Merge SUs that have all their users into another group to the group,
292 // but only if the group is no more than a few SUs.
293 void colorMergeIfPossibleSmallGroupsToNextGroup();
294
295 // Divides Blocks with important size.
296 // Idea of implementation: attribute new colors depending on topdown and
297 // bottom up links to other blocks.
298 void cutHugeBlocks();
299
300 // Put in one group all instructions with no users in this scheduling region
301 // (we'd want these groups be at the end).
302 void regroupNoUserInstructions();
303
Marek Olsake6f74382017-07-25 20:36:58 +0000304 // Give Reserved color to export instructions
305 void colorExports();
306
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000307 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
308
309 void topologicalSort();
310
311 void scheduleInsideBlocks();
312
313 void fillStats();
314};
315
316enum SISchedulerBlockSchedulerVariant {
317 BlockLatencyRegUsage,
318 BlockRegUsageLatency,
319 BlockRegUsage
320};
321
322class SIScheduleBlockScheduler {
323 SIScheduleDAGMI *DAG;
324 SISchedulerBlockSchedulerVariant Variant;
325 std::vector<SIScheduleBlock*> Blocks;
326
327 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
328 std::set<unsigned> LiveRegs;
329 // Num of schedulable unscheduled blocks reading the register.
330 std::map<unsigned, unsigned> LiveRegsConsumers;
331
332 std::vector<unsigned> LastPosHighLatencyParentScheduled;
333 int LastPosWaitedHighLatency;
334
335 std::vector<SIScheduleBlock*> BlocksScheduled;
336 unsigned NumBlockScheduled;
337 std::vector<SIScheduleBlock*> ReadyBlocks;
338
339 unsigned VregCurrentUsage;
340 unsigned SregCurrentUsage;
341
342 // Currently is only approximation.
343 unsigned maxVregUsage;
344 unsigned maxSregUsage;
345
346 std::vector<unsigned> BlockNumPredsLeft;
347 std::vector<unsigned> BlockNumSuccsLeft;
348
349public:
350 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
351 SISchedulerBlockSchedulerVariant Variant,
352 SIScheduleBlocks BlocksStruct);
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000353 ~SIScheduleBlockScheduler() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000354
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000355 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000356
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000357 unsigned getVGPRUsage() { return maxVregUsage; }
358 unsigned getSGPRUsage() { return maxSregUsage; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000359
360private:
361 struct SIBlockSchedCandidate : SISchedulerCandidate {
362 // The best Block candidate.
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000363 SIScheduleBlock *Block = nullptr;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000364
365 bool IsHighLatency;
366 int VGPRUsageDiff;
367 unsigned NumSuccessors;
368 unsigned NumHighLatencySuccessors;
369 unsigned LastPosHighLatParentScheduled;
370 unsigned Height;
371
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000372 SIBlockSchedCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000373
374 bool isValid() const { return Block; }
375
376 // Copy the status of another candidate without changing policy.
377 void setBest(SIBlockSchedCandidate &Best) {
378 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
379 Block = Best.Block;
380 Reason = Best.Reason;
381 IsHighLatency = Best.IsHighLatency;
382 VGPRUsageDiff = Best.VGPRUsageDiff;
383 NumSuccessors = Best.NumSuccessors;
384 NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
385 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
386 Height = Best.Height;
387 }
388 };
389
390 bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
391 SIBlockSchedCandidate &TryCand);
392 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
393 SIBlockSchedCandidate &TryCand);
394 SIScheduleBlock *pickBlock();
395
396 void addLiveRegs(std::set<unsigned> &Regs);
397 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
398 void releaseBlockSuccs(SIScheduleBlock *Parent);
399 void blockScheduled(SIScheduleBlock *Block);
400
401 // Check register pressure change
402 // by scheduling a block with these LiveIn and LiveOut.
403 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
404 std::set<unsigned> &OutRegs);
405
406 void schedule();
407};
408
409struct SIScheduleBlockResult {
410 std::vector<unsigned> SUs;
411 unsigned MaxSGPRUsage;
412 unsigned MaxVGPRUsage;
413};
414
415class SIScheduler {
416 SIScheduleDAGMI *DAG;
417 SIScheduleBlockCreator BlockCreator;
418
419public:
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000420 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000421
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000422 ~SIScheduler() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000423
424 struct SIScheduleBlockResult
425 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
426 SISchedulerBlockSchedulerVariant ScheduleVariant);
427};
428
Matt Arsenault6b6a2c32016-03-11 08:00:27 +0000429class SIScheduleDAGMI final : public ScheduleDAGMILive {
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000430 const SIInstrInfo *SITII;
431 const SIRegisterInfo *SITRI;
432
433 std::vector<SUnit> SUnitsLinksBackup;
434
435 // For moveLowLatencies. After all Scheduling variants are tested.
436 std::vector<unsigned> ScheduledSUnits;
437 std::vector<unsigned> ScheduledSUnitsInv;
438
439 unsigned VGPRSetID;
440 unsigned SGPRSetID;
441
442public:
443 SIScheduleDAGMI(MachineSchedContext *C);
444
445 ~SIScheduleDAGMI() override;
446
447 // Entry point for the schedule.
448 void schedule() override;
449
450 // To init Block's RPTracker.
451 void initRPTracker(RegPressureTracker &RPTracker) {
Alexander Timofeev9f61fea2017-02-14 14:29:05 +0000452 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000453 }
454
455 MachineBasicBlock *getBB() { return BB; }
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000456 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
457 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000458 LiveIntervals *getLIS() { return LIS; }
459 MachineRegisterInfo *getMRI() { return &MRI; }
460 const TargetRegisterInfo *getTRI() { return TRI; }
Valery Pykhtin9f3eca92017-03-28 07:19:48 +0000461 ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000462 SUnit& getEntrySU() { return EntrySU; }
463 SUnit& getExitSU() { return ExitSU; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000464
465 void restoreSULinksLeft();
466
467 template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
468 _Iterator End,
469 unsigned &VgprUsage,
470 unsigned &SgprUsage);
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000471
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000472 std::set<unsigned> getInRegs() {
Matthias Braun5d458612016-01-20 00:23:26 +0000473 std::set<unsigned> InRegs;
474 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
475 InRegs.insert(RegMaskPair.RegUnit);
476 }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000477 return InRegs;
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000478 }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000479
Valery Pykhtinf70f6832017-03-27 17:06:36 +0000480 std::set<unsigned> getOutRegs() {
481 std::set<unsigned> OutRegs;
482 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
483 OutRegs.insert(RegMaskPair.RegUnit);
484 }
485 return OutRegs;
486 };
487
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000488 unsigned getVGPRSetID() const { return VGPRSetID; }
489 unsigned getSGPRSetID() const { return SGPRSetID; }
490
491private:
492 void topologicalSort();
493 // After scheduling is done, improve low latency placements.
494 void moveLowLatencies();
495
496public:
497 // Some stats for scheduling inside blocks.
498 std::vector<unsigned> IsLowLatencySU;
499 std::vector<unsigned> LowLatencyOffset;
500 std::vector<unsigned> IsHighLatencySU;
501 // Topological sort
502 // Maps topological index to the node number.
503 std::vector<int> TopDownIndex2SU;
504 std::vector<int> BottomUpIndex2SU;
505};
506
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000507} // end namespace llvm
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000508
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000509#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H