blob: aab13e8c06cd7f36c04f29e7e0ad68e8723ca68b [file] [log] [blame]
Vincent Lejeune62f38ca2013-03-05 18:41:32 +00001//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Machine Scheduler interface
12// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "misched"
17
18#include "R600MachineScheduler.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/LiveIntervalAnalysis.h"
21#include "llvm/Pass.h"
22#include "llvm/PassManager.h"
23#include <set>
24#include <iostream>
25using namespace llvm;
26
27void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
28
29 DAG = dag;
30 TII = static_cast<const R600InstrInfo*>(DAG->TII);
31 TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
32 MRI = &DAG->MRI;
33 Available[IDAlu]->clear();
34 Available[IDFetch]->clear();
35 Available[IDOther]->clear();
36 CurInstKind = IDOther;
37 CurEmitted = 0;
38 OccupedSlotsMask = 15;
39 memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
40 InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
41
42
43 const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
44 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
45 InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
46 } else {
47 InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
48 }
49}
50
51void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
52{
53 if (QSrc->empty())
54 return;
55 for (ReadyQueue::iterator I = QSrc->begin(),
56 E = QSrc->end(); I != E; ++I) {
57 (*I)->NodeQueueId &= ~QSrc->getID();
58 QDst->push(*I);
59 }
60 QSrc->clear();
61}
62
63SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
64 SUnit *SU = 0;
65 IsTopNode = true;
66 NextInstKind = IDOther;
67
68 // check if we might want to switch current clause type
69 bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
70 (CurEmitted > InstKindLimit[CurInstKind]) ||
71 (Available[CurInstKind]->empty());
72 bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
73 (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
74
75 if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
76 (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
77 // try to pick ALU
78 SU = pickAlu();
79 if (SU) {
80 if (CurEmitted > InstKindLimit[IDAlu])
81 CurEmitted = 0;
82 NextInstKind = IDAlu;
83 }
84 }
85
86 if (!SU) {
87 // try to pick FETCH
88 SU = pickOther(IDFetch);
89 if (SU)
90 NextInstKind = IDFetch;
91 }
92
93 // try to pick other
94 if (!SU) {
95 SU = pickOther(IDOther);
96 if (SU)
97 NextInstKind = IDOther;
98 }
99
100 DEBUG(
101 if (SU) {
102 dbgs() << "picked node: ";
103 SU->dump(DAG);
104 } else {
105 dbgs() << "NO NODE ";
106 for (int i = 0; i < IDLast; ++i) {
107 Available[i]->dump();
108 Pending[i]->dump();
109 }
110 for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
111 const SUnit &S = DAG->SUnits[i];
112 if (!S.isScheduled)
113 S.dump(DAG);
114 }
115 }
116 );
117
118 return SU;
119}
120
121void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
122
123 DEBUG(dbgs() << "scheduled: ");
124 DEBUG(SU->dump(DAG));
125
126 if (NextInstKind != CurInstKind) {
127 DEBUG(dbgs() << "Instruction Type Switch\n");
128 if (NextInstKind != IDAlu)
129 OccupedSlotsMask = 15;
130 CurEmitted = 0;
131 CurInstKind = NextInstKind;
132 }
133
134 if (CurInstKind == IDAlu) {
135 switch (getAluKind(SU)) {
136 case AluT_XYZW:
137 CurEmitted += 4;
138 break;
139 case AluDiscarded:
140 break;
141 default: {
142 ++CurEmitted;
143 for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
144 E = SU->getInstr()->operands_end(); It != E; ++It) {
145 MachineOperand &MO = *It;
146 if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
147 ++CurEmitted;
148 }
149 }
150 }
151 } else {
152 ++CurEmitted;
153 }
154
155
156 DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
157
158 if (CurInstKind != IDFetch) {
159 MoveUnits(Pending[IDFetch], Available[IDFetch]);
160 }
161 MoveUnits(Pending[IDOther], Available[IDOther]);
162}
163
164void R600SchedStrategy::releaseTopNode(SUnit *SU) {
165 int IK = getInstKind(SU);
166
167 DEBUG(dbgs() << IK << " <= ");
168 DEBUG(SU->dump(DAG));
169
170 Pending[IK]->push(SU);
171}
172
173void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
174}
175
176bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
177 const TargetRegisterClass *RC) const {
178 if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
179 return RC->contains(Reg);
180 } else {
181 return MRI->getRegClass(Reg) == RC;
182 }
183}
184
185R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
186 MachineInstr *MI = SU->getInstr();
187
188 switch (MI->getOpcode()) {
189 case AMDGPU::INTERP_PAIR_XY:
190 case AMDGPU::INTERP_PAIR_ZW:
191 case AMDGPU::INTERP_VEC_LOAD:
192 return AluT_XYZW;
193 case AMDGPU::COPY:
194 if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
195 // %vregX = COPY Tn_X is likely to be discarded in favor of an
196 // assignement of Tn_X to %vregX, don't considers it in scheduling
197 return AluDiscarded;
198 }
199 else if (MI->getOperand(1).isUndef()) {
200 // MI will become a KILL, don't considers it in scheduling
201 return AluDiscarded;
202 }
203 default:
204 break;
205 }
206
207 // Does the instruction take a whole IG ?
208 if(TII->isVector(*MI) ||
209 TII->isCubeOp(MI->getOpcode()) ||
210 TII->isReductionOp(MI->getOpcode()))
211 return AluT_XYZW;
212
213 // Is the result already assigned to a channel ?
214 unsigned DestSubReg = MI->getOperand(0).getSubReg();
215 switch (DestSubReg) {
216 case AMDGPU::sub0:
217 return AluT_X;
218 case AMDGPU::sub1:
219 return AluT_Y;
220 case AMDGPU::sub2:
221 return AluT_Z;
222 case AMDGPU::sub3:
223 return AluT_W;
224 default:
225 break;
226 }
227
228 // Is the result already member of a X/Y/Z/W class ?
229 unsigned DestReg = MI->getOperand(0).getReg();
230 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
231 regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
232 return AluT_X;
233 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
234 return AluT_Y;
235 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
236 return AluT_Z;
237 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
238 return AluT_W;
239 if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
240 return AluT_XYZW;
241
242 return AluAny;
243
244}
245
246int R600SchedStrategy::getInstKind(SUnit* SU) {
247 int Opcode = SU->getInstr()->getOpcode();
248
249 if (TII->isALUInstr(Opcode)) {
250 return IDAlu;
251 }
252
253 switch (Opcode) {
254 case AMDGPU::COPY:
255 case AMDGPU::CONST_COPY:
256 case AMDGPU::INTERP_PAIR_XY:
257 case AMDGPU::INTERP_PAIR_ZW:
258 case AMDGPU::INTERP_VEC_LOAD:
259 case AMDGPU::DOT4_eg_pseudo:
260 case AMDGPU::DOT4_r600_pseudo:
261 return IDAlu;
262 case AMDGPU::TEX_VTX_CONSTBUF:
263 case AMDGPU::TEX_VTX_TEXBUF:
264 case AMDGPU::TEX_LD:
265 case AMDGPU::TEX_GET_TEXTURE_RESINFO:
266 case AMDGPU::TEX_GET_GRADIENTS_H:
267 case AMDGPU::TEX_GET_GRADIENTS_V:
268 case AMDGPU::TEX_SET_GRADIENTS_H:
269 case AMDGPU::TEX_SET_GRADIENTS_V:
270 case AMDGPU::TEX_SAMPLE:
271 case AMDGPU::TEX_SAMPLE_C:
272 case AMDGPU::TEX_SAMPLE_L:
273 case AMDGPU::TEX_SAMPLE_C_L:
274 case AMDGPU::TEX_SAMPLE_LB:
275 case AMDGPU::TEX_SAMPLE_C_LB:
276 case AMDGPU::TEX_SAMPLE_G:
277 case AMDGPU::TEX_SAMPLE_C_G:
278 case AMDGPU::TXD:
279 case AMDGPU::TXD_SHADOW:
280 return IDFetch;
281 default:
282 DEBUG(
283 dbgs() << "other inst: ";
284 SU->dump(DAG);
285 );
286 return IDOther;
287 }
288}
289
290class ConstPairs {
291private:
292 unsigned XYPair;
293 unsigned ZWPair;
294public:
295 ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
296 for (unsigned i = 0; i < 3; i++) {
297 unsigned ReadConstChan = ReadConst[i] & 3;
298 unsigned ReadConstIndex = ReadConst[i] & (~3);
299 if (ReadConstChan < 2) {
300 if (!XYPair) {
301 XYPair = ReadConstIndex;
302 }
303 } else {
304 if (!ZWPair) {
305 ZWPair = ReadConstIndex;
306 }
307 }
308 }
309 }
310
311 bool isCompatibleWith(const ConstPairs& CP) const {
312 return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
313 (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
314 }
315};
316
317static
318const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
319 unsigned ReadConsts[3] = {0, 0, 0};
320 R600Operands::Ops OpTable[3][2] = {
321 {R600Operands::SRC0, R600Operands::SRC0_SEL},
322 {R600Operands::SRC1, R600Operands::SRC1_SEL},
323 {R600Operands::SRC2, R600Operands::SRC2_SEL},
324 };
325
326 if (!TII->isALUInstr(MI.getOpcode()))
327 return ConstPairs(ReadConsts);
328
329 for (unsigned i = 0; i < 3; i++) {
330 int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
331 if (SrcIdx < 0)
332 break;
333 if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
334 ReadConsts[i] =MI.getOperand(
335 TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
336 }
337 return ConstPairs(ReadConsts);
338}
339
340bool
341R600SchedStrategy::isBundleable(const MachineInstr& MI) {
342 const ConstPairs &MIPair = getPairs(TII, MI);
343 for (unsigned i = 0; i < 4; i++) {
344 if (!InstructionsGroupCandidate[i])
345 continue;
346 const ConstPairs &IGPair = getPairs(TII,
347 *InstructionsGroupCandidate[i]->getInstr());
348 if (!IGPair.isCompatibleWith(MIPair))
349 return false;
350 }
351 return true;
352}
353
354SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
355 if (Q.empty())
356 return NULL;
357 for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
358 It != E; ++It) {
359 SUnit *SU = *It;
360 if (isBundleable(*SU->getInstr())) {
361 Q.erase(It);
362 return SU;
363 }
364 }
365 return NULL;
366}
367
368void R600SchedStrategy::LoadAlu() {
369 ReadyQueue *QSrc = Pending[IDAlu];
370 for (ReadyQueue::iterator I = QSrc->begin(),
371 E = QSrc->end(); I != E; ++I) {
372 (*I)->NodeQueueId &= ~QSrc->getID();
373 AluKind AK = getAluKind(*I);
374 AvailableAlus[AK].insert(*I);
375 }
376 QSrc->clear();
377}
378
379void R600SchedStrategy::PrepareNextSlot() {
380 DEBUG(dbgs() << "New Slot\n");
381 assert (OccupedSlotsMask && "Slot wasn't filled");
382 OccupedSlotsMask = 0;
383 memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
384 LoadAlu();
385}
386
387void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
388 unsigned DestReg = MI->getOperand(0).getReg();
389 // PressureRegister crashes if an operand is def and used in the same inst
390 // and we try to constraint its regclass
391 for (MachineInstr::mop_iterator It = MI->operands_begin(),
392 E = MI->operands_end(); It != E; ++It) {
393 MachineOperand &MO = *It;
394 if (MO.isReg() && !MO.isDef() &&
395 MO.getReg() == MI->getOperand(0).getReg())
396 return;
397 }
398 // Constrains the regclass of DestReg to assign it to Slot
399 switch (Slot) {
400 case 0:
401 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
402 break;
403 case 1:
404 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
405 break;
406 case 2:
407 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
408 break;
409 case 3:
410 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
411 break;
412 }
413}
414
415SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
416 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
417 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
418 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
419 if (!UnslotedSU) {
420 return SlotedSU;
421 } else if (!SlotedSU) {
422 AssignSlot(UnslotedSU->getInstr(), Slot);
423 return UnslotedSU;
424 } else {
425 //Determine which one to pick (the lesser one)
426 if (CompareSUnit()(SlotedSU, UnslotedSU)) {
427 AvailableAlus[AluAny].insert(UnslotedSU);
428 return SlotedSU;
429 } else {
430 AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
431 AssignSlot(UnslotedSU->getInstr(), Slot);
432 return UnslotedSU;
433 }
434 }
435}
436
437bool R600SchedStrategy::isAvailablesAluEmpty() const {
438 return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
439 AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
440 AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
441 AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
442}
443
444SUnit* R600SchedStrategy::pickAlu() {
445 while (!isAvailablesAluEmpty()) {
446 if (!OccupedSlotsMask) {
447 // Flush physical reg copies (RA will discard them)
448 if (!AvailableAlus[AluDiscarded].empty()) {
449 OccupedSlotsMask = 15;
450 return PopInst(AvailableAlus[AluDiscarded]);
451 }
452 // If there is a T_XYZW alu available, use it
453 if (!AvailableAlus[AluT_XYZW].empty()) {
454 OccupedSlotsMask = 15;
455 return PopInst(AvailableAlus[AluT_XYZW]);
456 }
457 }
458 for (unsigned Chan = 0; Chan < 4; ++Chan) {
459 bool isOccupied = OccupedSlotsMask & (1 << Chan);
460 if (!isOccupied) {
461 SUnit *SU = AttemptFillSlot(Chan);
462 if (SU) {
463 OccupedSlotsMask |= (1 << Chan);
464 InstructionsGroupCandidate[Chan] = SU;
465 return SU;
466 }
467 }
468 }
469 PrepareNextSlot();
470 }
471 return NULL;
472}
473
474SUnit* R600SchedStrategy::pickOther(int QID) {
475 SUnit *SU = 0;
476 ReadyQueue *AQ = Available[QID];
477
478 if (AQ->empty()) {
479 MoveUnits(Pending[QID], AQ);
480 }
481 if (!AQ->empty()) {
482 SU = *AQ->begin();
483 AQ->remove(AQ->begin());
484 }
485 return SU;
486}
487