blob: d824e38504e6543b49b3698e93df44078de36c29 [file] [log] [blame]
Eugene Zelenko2bc2f332016-12-09 22:06:55 +00001//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
Nicolai Haehnle02c32912016-01-13 16:10:10 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief SI Machine Scheduler interface
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
16#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
17
18#include "SIInstrInfo.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000019#include "llvm/CodeGen/MachineBasicBlock.h"
Nicolai Haehnle02c32912016-01-13 16:10:10 +000020#include "llvm/CodeGen/MachineScheduler.h"
21#include "llvm/CodeGen/RegisterPressure.h"
Eugene Zelenko2bc2f332016-12-09 22:06:55 +000022#include "llvm/CodeGen/ScheduleDAG.h"
23#include <cassert>
24#include <cstdint>
25#include <map>
26#include <memory>
27#include <set>
28#include <vector>
Nicolai Haehnle02c32912016-01-13 16:10:10 +000029
30namespace llvm {
31
32enum SIScheduleCandReason {
33 NoCand,
34 RegUsage,
35 Latency,
36 Successor,
37 Depth,
38 NodeOrder
39};
40
41struct SISchedulerCandidate {
42 // The reason for this candidate.
Eugene Zelenko66203762017-01-21 00:53:49 +000043 SIScheduleCandReason Reason = NoCand;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000044
45 // Set of reasons that apply to multiple candidates.
Eugene Zelenko66203762017-01-21 00:53:49 +000046 uint32_t RepeatReasonSet = 0;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000047
Eugene Zelenko66203762017-01-21 00:53:49 +000048 SISchedulerCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000049
50 bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
51 void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
52};
53
54class SIScheduleDAGMI;
55class SIScheduleBlockCreator;
56
Valery Pykhtinfb990552017-03-27 18:22:39 +000057enum SIScheduleBlockLinkKind {
58 NoData,
59 Data
60};
61
Nicolai Haehnle02c32912016-01-13 16:10:10 +000062class SIScheduleBlock {
63 SIScheduleDAGMI *DAG;
64 SIScheduleBlockCreator *BC;
65
66 std::vector<SUnit*> SUnits;
67 std::map<unsigned, unsigned> NodeNum2Index;
68 std::vector<SUnit*> TopReadySUs;
69 std::vector<SUnit*> ScheduledSUnits;
70
71 /// The top of the unscheduled zone.
72 IntervalPressure TopPressure;
73 RegPressureTracker TopRPTracker;
74
75 // Pressure: number of said class of registers needed to
76 // store the live virtual and real registers.
77 // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
78 // Pressure of additional registers required inside the block.
79 std::vector<unsigned> InternalAdditionnalPressure;
80 // Pressure of input and output registers
81 std::vector<unsigned> LiveInPressure;
82 std::vector<unsigned> LiveOutPressure;
83 // Registers required by the block, and outputs.
84 // We do track only virtual registers.
85 // Note that some registers are not 32 bits,
86 // and thus the pressure is not equal
87 // to the number of live registers.
88 std::set<unsigned> LiveInRegs;
89 std::set<unsigned> LiveOutRegs;
90
Eugene Zelenko66203762017-01-21 00:53:49 +000091 bool Scheduled = false;
92 bool HighLatencyBlock = false;
Nicolai Haehnle02c32912016-01-13 16:10:10 +000093
94 std::vector<unsigned> HasLowLatencyNonWaitedParent;
95
96 // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
97 unsigned ID;
98
99 std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
Valery Pykhtinfb990552017-03-27 18:22:39 +0000100 // All blocks successors, and the kind of link
101 std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
Eugene Zelenko66203762017-01-21 00:53:49 +0000102 unsigned NumHighLatencySuccessors = 0;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000103
104public:
105 SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
106 unsigned ID):
Eugene Zelenko66203762017-01-21 00:53:49 +0000107 DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000108
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000109 ~SIScheduleBlock() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000110
111 unsigned getID() const { return ID; }
112
113 /// Functions for Block construction.
114 void addUnit(SUnit *SU);
115
116 // When all SUs have been added.
117 void finalizeUnits();
118
119 // Add block pred, which has instruction predecessor of SU.
120 void addPred(SIScheduleBlock *Pred);
Valery Pykhtinfb990552017-03-27 18:22:39 +0000121 void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000122
123 const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
Valery Pykhtinfb990552017-03-27 18:22:39 +0000124 ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
125 getSuccs() const { return Succs; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000126
127 unsigned Height; // Maximum topdown path length to block without outputs
128 unsigned Depth; // Maximum bottomup path length to block without inputs
129
130 unsigned getNumHighLatencySuccessors() const {
131 return NumHighLatencySuccessors;
132 }
133
134 bool isHighLatencyBlock() { return HighLatencyBlock; }
135
136 // This is approximative.
137 // Ideally should take into accounts some instructions (rcp, etc)
138 // are 4 times slower.
139 int getCost() { return SUnits.size(); }
140
141 // The block Predecessors and Successors must be all registered
142 // before fastSchedule().
143 // Fast schedule with no particular requirement.
144 void fastSchedule();
145
146 std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
147
148 // Complete schedule that will try to minimize reg pressure and
149 // low latencies, and will fill liveins and liveouts.
150 // Needs all MIs to be grouped between BeginBlock and EndBlock.
151 // The MIs can be moved after the scheduling,
152 // it is just used to allow correct track of live registers.
153 void schedule(MachineBasicBlock::iterator BeginBlock,
154 MachineBasicBlock::iterator EndBlock);
155
156 bool isScheduled() { return Scheduled; }
157
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000158 // Needs the block to be scheduled inside
159 // TODO: find a way to compute it.
160 std::vector<unsigned> &getInternalAdditionnalRegUsage() {
161 return InternalAdditionnalPressure;
162 }
163
164 std::set<unsigned> &getInRegs() { return LiveInRegs; }
165 std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
166
167 void printDebug(bool Full);
168
169private:
170 struct SISchedCandidate : SISchedulerCandidate {
171 // The best SUnit candidate.
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000172 SUnit *SU = nullptr;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000173
174 unsigned SGPRUsage;
175 unsigned VGPRUsage;
176 bool IsLowLatency;
177 unsigned LowLatencyOffset;
178 bool HasLowLatencyNonWaitedParent;
179
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000180 SISchedCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000181
182 bool isValid() const { return SU; }
183
184 // Copy the status of another candidate without changing policy.
185 void setBest(SISchedCandidate &Best) {
186 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
187 SU = Best.SU;
188 Reason = Best.Reason;
189 SGPRUsage = Best.SGPRUsage;
190 VGPRUsage = Best.VGPRUsage;
191 IsLowLatency = Best.IsLowLatency;
192 LowLatencyOffset = Best.LowLatencyOffset;
193 HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
194 }
195 };
196
197 void undoSchedule();
198
199 void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
200 void releaseSucc(SUnit *SU, SDep *SuccEdge);
201 // InOrOutBlock: restrict to links pointing inside the block (true),
202 // or restrict to links pointing outside the block (false).
203 void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
204
205 void nodeScheduled(SUnit *SU);
206 void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
207 void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
208 SUnit* pickNode();
209 void traceCandidate(const SISchedCandidate &Cand);
210 void initRegPressure(MachineBasicBlock::iterator BeginBlock,
211 MachineBasicBlock::iterator EndBlock);
212};
213
214struct SIScheduleBlocks {
215 std::vector<SIScheduleBlock*> Blocks;
216 std::vector<int> TopDownIndex2Block;
217 std::vector<int> TopDownBlock2Index;
218};
219
220enum SISchedulerBlockCreatorVariant {
Eugene Zelenko66203762017-01-21 00:53:49 +0000221 LatenciesAlone,
222 LatenciesGrouped,
223 LatenciesAlonePlusConsecutive
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000224};
225
226class SIScheduleBlockCreator {
227 SIScheduleDAGMI *DAG;
228 // unique_ptr handles freeing memory for us.
229 std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
230 std::map<SISchedulerBlockCreatorVariant,
231 SIScheduleBlocks> Blocks;
232 std::vector<SIScheduleBlock*> CurrentBlocks;
233 std::vector<int> Node2CurrentBlock;
234
235 // Topological sort
236 // Maps topological index to the node number.
237 std::vector<int> TopDownIndex2Block;
238 std::vector<int> TopDownBlock2Index;
239 std::vector<int> BottomUpIndex2Block;
240
241 // 0 -> Color not given.
242 // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
243 // Above -> Other groups.
244 int NextReservedID;
245 int NextNonReservedID;
246 std::vector<int> CurrentColoring;
247 std::vector<int> CurrentTopDownReservedDependencyColoring;
248 std::vector<int> CurrentBottomUpReservedDependencyColoring;
249
250public:
251 SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
252 ~SIScheduleBlockCreator();
253
254 SIScheduleBlocks
255 getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
256
257 bool isSUInBlock(SUnit *SU, unsigned ID);
258
259private:
260 // Give a Reserved color to every high latency.
261 void colorHighLatenciesAlone();
262
263 // Create groups of high latencies with a Reserved color.
264 void colorHighLatenciesGroups();
265
266 // Compute coloring for topdown and bottom traversals with
267 // different colors depending on dependencies on Reserved colors.
268 void colorComputeReservedDependencies();
269
270 // Give color to all non-colored SUs according to Reserved groups dependencies.
271 void colorAccordingToReservedDependencies();
272
273 // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
274 // The new colors are computed according to the dependencies on the other blocks
275 // formed with colorAccordingToReservedDependencies.
276 void colorEndsAccordingToDependencies();
277
278 // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
279 void colorForceConsecutiveOrderInGroup();
280
281 // Merge Constant loads that have all their users into another group to the group.
282 // (TODO: else if all their users depend on the same group, put them there)
283 void colorMergeConstantLoadsNextGroup();
284
285 // Merge SUs that have all their users into another group to the group
286 void colorMergeIfPossibleNextGroup();
287
288 // Merge SUs that have all their users into another group to the group,
289 // but only for Reserved groups.
290 void colorMergeIfPossibleNextGroupOnlyForReserved();
291
292 // Merge SUs that have all their users into another group to the group,
293 // but only if the group is no more than a few SUs.
294 void colorMergeIfPossibleSmallGroupsToNextGroup();
295
296 // Divides Blocks with important size.
297 // Idea of implementation: attribute new colors depending on topdown and
298 // bottom up links to other blocks.
299 void cutHugeBlocks();
300
301 // Put in one group all instructions with no users in this scheduling region
302 // (we'd want these groups be at the end).
303 void regroupNoUserInstructions();
304
Marek Olsake6f74382017-07-25 20:36:58 +0000305 // Give Reserved color to export instructions
306 void colorExports();
307
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000308 void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
309
310 void topologicalSort();
311
312 void scheduleInsideBlocks();
313
314 void fillStats();
315};
316
317enum SISchedulerBlockSchedulerVariant {
318 BlockLatencyRegUsage,
319 BlockRegUsageLatency,
320 BlockRegUsage
321};
322
323class SIScheduleBlockScheduler {
324 SIScheduleDAGMI *DAG;
325 SISchedulerBlockSchedulerVariant Variant;
326 std::vector<SIScheduleBlock*> Blocks;
327
328 std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
329 std::set<unsigned> LiveRegs;
330 // Num of schedulable unscheduled blocks reading the register.
331 std::map<unsigned, unsigned> LiveRegsConsumers;
332
333 std::vector<unsigned> LastPosHighLatencyParentScheduled;
334 int LastPosWaitedHighLatency;
335
336 std::vector<SIScheduleBlock*> BlocksScheduled;
337 unsigned NumBlockScheduled;
338 std::vector<SIScheduleBlock*> ReadyBlocks;
339
340 unsigned VregCurrentUsage;
341 unsigned SregCurrentUsage;
342
343 // Currently is only approximation.
344 unsigned maxVregUsage;
345 unsigned maxSregUsage;
346
347 std::vector<unsigned> BlockNumPredsLeft;
348 std::vector<unsigned> BlockNumSuccsLeft;
349
350public:
351 SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
352 SISchedulerBlockSchedulerVariant Variant,
353 SIScheduleBlocks BlocksStruct);
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000354 ~SIScheduleBlockScheduler() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000355
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000356 std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000357
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000358 unsigned getVGPRUsage() { return maxVregUsage; }
359 unsigned getSGPRUsage() { return maxSregUsage; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000360
361private:
362 struct SIBlockSchedCandidate : SISchedulerCandidate {
363 // The best Block candidate.
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000364 SIScheduleBlock *Block = nullptr;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000365
366 bool IsHighLatency;
367 int VGPRUsageDiff;
368 unsigned NumSuccessors;
369 unsigned NumHighLatencySuccessors;
370 unsigned LastPosHighLatParentScheduled;
371 unsigned Height;
372
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000373 SIBlockSchedCandidate() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000374
375 bool isValid() const { return Block; }
376
377 // Copy the status of another candidate without changing policy.
378 void setBest(SIBlockSchedCandidate &Best) {
379 assert(Best.Reason != NoCand && "uninitialized Sched candidate");
380 Block = Best.Block;
381 Reason = Best.Reason;
382 IsHighLatency = Best.IsHighLatency;
383 VGPRUsageDiff = Best.VGPRUsageDiff;
384 NumSuccessors = Best.NumSuccessors;
385 NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
386 LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
387 Height = Best.Height;
388 }
389 };
390
391 bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
392 SIBlockSchedCandidate &TryCand);
393 bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
394 SIBlockSchedCandidate &TryCand);
395 SIScheduleBlock *pickBlock();
396
397 void addLiveRegs(std::set<unsigned> &Regs);
398 void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
399 void releaseBlockSuccs(SIScheduleBlock *Parent);
400 void blockScheduled(SIScheduleBlock *Block);
401
402 // Check register pressure change
403 // by scheduling a block with these LiveIn and LiveOut.
404 std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
405 std::set<unsigned> &OutRegs);
406
407 void schedule();
408};
409
410struct SIScheduleBlockResult {
411 std::vector<unsigned> SUs;
412 unsigned MaxSGPRUsage;
413 unsigned MaxVGPRUsage;
414};
415
416class SIScheduler {
417 SIScheduleDAGMI *DAG;
418 SIScheduleBlockCreator BlockCreator;
419
420public:
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000421 SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000422
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000423 ~SIScheduler() = default;
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000424
425 struct SIScheduleBlockResult
426 scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
427 SISchedulerBlockSchedulerVariant ScheduleVariant);
428};
429
Matt Arsenault6b6a2c32016-03-11 08:00:27 +0000430class SIScheduleDAGMI final : public ScheduleDAGMILive {
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000431 const SIInstrInfo *SITII;
432 const SIRegisterInfo *SITRI;
433
434 std::vector<SUnit> SUnitsLinksBackup;
435
436 // For moveLowLatencies. After all Scheduling variants are tested.
437 std::vector<unsigned> ScheduledSUnits;
438 std::vector<unsigned> ScheduledSUnitsInv;
439
440 unsigned VGPRSetID;
441 unsigned SGPRSetID;
442
443public:
444 SIScheduleDAGMI(MachineSchedContext *C);
445
446 ~SIScheduleDAGMI() override;
447
448 // Entry point for the schedule.
449 void schedule() override;
450
451 // To init Block's RPTracker.
452 void initRPTracker(RegPressureTracker &RPTracker) {
Alexander Timofeev9f61fea2017-02-14 14:29:05 +0000453 RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, false, false);
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000454 }
455
456 MachineBasicBlock *getBB() { return BB; }
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000457 MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
458 MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000459 LiveIntervals *getLIS() { return LIS; }
460 MachineRegisterInfo *getMRI() { return &MRI; }
461 const TargetRegisterInfo *getTRI() { return TRI; }
Valery Pykhtin9f3eca92017-03-28 07:19:48 +0000462 ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000463 SUnit& getEntrySU() { return EntrySU; }
464 SUnit& getExitSU() { return ExitSU; }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000465
466 void restoreSULinksLeft();
467
468 template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
469 _Iterator End,
470 unsigned &VgprUsage,
471 unsigned &SgprUsage);
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000472
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000473 std::set<unsigned> getInRegs() {
Matthias Braun5d458612016-01-20 00:23:26 +0000474 std::set<unsigned> InRegs;
475 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
476 InRegs.insert(RegMaskPair.RegUnit);
477 }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000478 return InRegs;
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000479 }
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000480
Valery Pykhtinf70f6832017-03-27 17:06:36 +0000481 std::set<unsigned> getOutRegs() {
482 std::set<unsigned> OutRegs;
483 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
484 OutRegs.insert(RegMaskPair.RegUnit);
485 }
486 return OutRegs;
487 };
488
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000489 unsigned getVGPRSetID() const { return VGPRSetID; }
490 unsigned getSGPRSetID() const { return SGPRSetID; }
491
492private:
493 void topologicalSort();
494 // After scheduling is done, improve low latency placements.
495 void moveLowLatencies();
496
497public:
498 // Some stats for scheduling inside blocks.
499 std::vector<unsigned> IsLowLatencySU;
500 std::vector<unsigned> LowLatencyOffset;
501 std::vector<unsigned> IsHighLatencySU;
502 // Topological sort
503 // Maps topological index to the node number.
504 std::vector<int> TopDownIndex2SU;
505 std::vector<int> BottomUpIndex2SU;
506};
507
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000508} // end namespace llvm
Nicolai Haehnle02c32912016-01-13 16:10:10 +0000509
Eugene Zelenko2bc2f332016-12-09 22:06:55 +0000510#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H