blob: 9074364bb3a2391a7f6cf0bab253a375c73db887 [file] [log] [blame]
Vincent Lejeune62f38ca2013-03-05 18:41:32 +00001//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// \brief R600 Machine Scheduler interface
12// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "misched"
17
18#include "R600MachineScheduler.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/LiveIntervalAnalysis.h"
21#include "llvm/Pass.h"
22#include "llvm/PassManager.h"
NAKAMURA Takumi3f179b52013-03-11 08:19:28 +000023#include "llvm/Support/raw_ostream.h"
Vincent Lejeune62f38ca2013-03-05 18:41:32 +000024#include <set>
NAKAMURA Takumi3f179b52013-03-11 08:19:28 +000025
Vincent Lejeune62f38ca2013-03-05 18:41:32 +000026using namespace llvm;
27
28void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
29
30 DAG = dag;
31 TII = static_cast<const R600InstrInfo*>(DAG->TII);
32 TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
33 MRI = &DAG->MRI;
34 Available[IDAlu]->clear();
35 Available[IDFetch]->clear();
36 Available[IDOther]->clear();
37 CurInstKind = IDOther;
38 CurEmitted = 0;
39 OccupedSlotsMask = 15;
Vincent Lejeune62f38ca2013-03-05 18:41:32 +000040 InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
41
42
43 const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
44 if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
45 InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
46 } else {
47 InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
48 }
49}
50
51void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
52{
53 if (QSrc->empty())
54 return;
55 for (ReadyQueue::iterator I = QSrc->begin(),
56 E = QSrc->end(); I != E; ++I) {
57 (*I)->NodeQueueId &= ~QSrc->getID();
58 QDst->push(*I);
59 }
60 QSrc->clear();
61}
62
63SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
64 SUnit *SU = 0;
65 IsTopNode = true;
66 NextInstKind = IDOther;
67
68 // check if we might want to switch current clause type
69 bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
70 (CurEmitted > InstKindLimit[CurInstKind]) ||
71 (Available[CurInstKind]->empty());
72 bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
73 (!Available[IDFetch]->empty() || !Available[IDOther]->empty());
74
75 if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
76 (!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
77 // try to pick ALU
78 SU = pickAlu();
79 if (SU) {
80 if (CurEmitted > InstKindLimit[IDAlu])
81 CurEmitted = 0;
82 NextInstKind = IDAlu;
83 }
84 }
85
86 if (!SU) {
87 // try to pick FETCH
88 SU = pickOther(IDFetch);
89 if (SU)
90 NextInstKind = IDFetch;
91 }
92
93 // try to pick other
94 if (!SU) {
95 SU = pickOther(IDOther);
96 if (SU)
97 NextInstKind = IDOther;
98 }
99
100 DEBUG(
101 if (SU) {
102 dbgs() << "picked node: ";
103 SU->dump(DAG);
104 } else {
105 dbgs() << "NO NODE ";
106 for (int i = 0; i < IDLast; ++i) {
107 Available[i]->dump();
108 Pending[i]->dump();
109 }
110 for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
111 const SUnit &S = DAG->SUnits[i];
112 if (!S.isScheduled)
113 S.dump(DAG);
114 }
115 }
116 );
117
118 return SU;
119}
120
121void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
122
123 DEBUG(dbgs() << "scheduled: ");
124 DEBUG(SU->dump(DAG));
125
126 if (NextInstKind != CurInstKind) {
127 DEBUG(dbgs() << "Instruction Type Switch\n");
128 if (NextInstKind != IDAlu)
129 OccupedSlotsMask = 15;
130 CurEmitted = 0;
131 CurInstKind = NextInstKind;
132 }
133
134 if (CurInstKind == IDAlu) {
135 switch (getAluKind(SU)) {
136 case AluT_XYZW:
137 CurEmitted += 4;
138 break;
139 case AluDiscarded:
140 break;
141 default: {
142 ++CurEmitted;
143 for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
144 E = SU->getInstr()->operands_end(); It != E; ++It) {
145 MachineOperand &MO = *It;
146 if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
147 ++CurEmitted;
148 }
149 }
150 }
151 } else {
152 ++CurEmitted;
153 }
154
155
156 DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n");
157
158 if (CurInstKind != IDFetch) {
159 MoveUnits(Pending[IDFetch], Available[IDFetch]);
160 }
161 MoveUnits(Pending[IDOther], Available[IDOther]);
162}
163
164void R600SchedStrategy::releaseTopNode(SUnit *SU) {
165 int IK = getInstKind(SU);
166
167 DEBUG(dbgs() << IK << " <= ");
168 DEBUG(SU->dump(DAG));
169
170 Pending[IK]->push(SU);
171}
172
173void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
174}
175
176bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
177 const TargetRegisterClass *RC) const {
178 if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
179 return RC->contains(Reg);
180 } else {
181 return MRI->getRegClass(Reg) == RC;
182 }
183}
184
185R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
186 MachineInstr *MI = SU->getInstr();
187
188 switch (MI->getOpcode()) {
189 case AMDGPU::INTERP_PAIR_XY:
190 case AMDGPU::INTERP_PAIR_ZW:
191 case AMDGPU::INTERP_VEC_LOAD:
192 return AluT_XYZW;
193 case AMDGPU::COPY:
194 if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
195 // %vregX = COPY Tn_X is likely to be discarded in favor of an
196 // assignement of Tn_X to %vregX, don't considers it in scheduling
197 return AluDiscarded;
198 }
199 else if (MI->getOperand(1).isUndef()) {
200 // MI will become a KILL, don't considers it in scheduling
201 return AluDiscarded;
202 }
203 default:
204 break;
205 }
206
207 // Does the instruction take a whole IG ?
208 if(TII->isVector(*MI) ||
209 TII->isCubeOp(MI->getOpcode()) ||
210 TII->isReductionOp(MI->getOpcode()))
211 return AluT_XYZW;
212
213 // Is the result already assigned to a channel ?
214 unsigned DestSubReg = MI->getOperand(0).getSubReg();
215 switch (DestSubReg) {
216 case AMDGPU::sub0:
217 return AluT_X;
218 case AMDGPU::sub1:
219 return AluT_Y;
220 case AMDGPU::sub2:
221 return AluT_Z;
222 case AMDGPU::sub3:
223 return AluT_W;
224 default:
225 break;
226 }
227
228 // Is the result already member of a X/Y/Z/W class ?
229 unsigned DestReg = MI->getOperand(0).getReg();
230 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
231 regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
232 return AluT_X;
233 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
234 return AluT_Y;
235 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
236 return AluT_Z;
237 if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
238 return AluT_W;
239 if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
240 return AluT_XYZW;
241
242 return AluAny;
243
244}
245
246int R600SchedStrategy::getInstKind(SUnit* SU) {
247 int Opcode = SU->getInstr()->getOpcode();
248
249 if (TII->isALUInstr(Opcode)) {
250 return IDAlu;
251 }
252
253 switch (Opcode) {
254 case AMDGPU::COPY:
255 case AMDGPU::CONST_COPY:
256 case AMDGPU::INTERP_PAIR_XY:
257 case AMDGPU::INTERP_PAIR_ZW:
258 case AMDGPU::INTERP_VEC_LOAD:
259 case AMDGPU::DOT4_eg_pseudo:
260 case AMDGPU::DOT4_r600_pseudo:
261 return IDAlu;
262 case AMDGPU::TEX_VTX_CONSTBUF:
263 case AMDGPU::TEX_VTX_TEXBUF:
264 case AMDGPU::TEX_LD:
265 case AMDGPU::TEX_GET_TEXTURE_RESINFO:
266 case AMDGPU::TEX_GET_GRADIENTS_H:
267 case AMDGPU::TEX_GET_GRADIENTS_V:
268 case AMDGPU::TEX_SET_GRADIENTS_H:
269 case AMDGPU::TEX_SET_GRADIENTS_V:
270 case AMDGPU::TEX_SAMPLE:
271 case AMDGPU::TEX_SAMPLE_C:
272 case AMDGPU::TEX_SAMPLE_L:
273 case AMDGPU::TEX_SAMPLE_C_L:
274 case AMDGPU::TEX_SAMPLE_LB:
275 case AMDGPU::TEX_SAMPLE_C_LB:
276 case AMDGPU::TEX_SAMPLE_G:
277 case AMDGPU::TEX_SAMPLE_C_G:
278 case AMDGPU::TXD:
279 case AMDGPU::TXD_SHADOW:
280 return IDFetch;
281 default:
282 DEBUG(
283 dbgs() << "other inst: ";
284 SU->dump(DAG);
285 );
286 return IDOther;
287 }
288}
289
Vincent Lejeune62f38ca2013-03-05 18:41:32 +0000290SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
291 if (Q.empty())
292 return NULL;
293 for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
294 It != E; ++It) {
295 SUnit *SU = *It;
Vincent Lejeune3ab0ba32013-03-14 15:50:45 +0000296 InstructionsGroupCandidate.push_back(SU->getInstr());
297 if (TII->canBundle(InstructionsGroupCandidate)) {
298 InstructionsGroupCandidate.pop_back();
Vincent Lejeune62f38ca2013-03-05 18:41:32 +0000299 Q.erase(It);
300 return SU;
Vincent Lejeune3ab0ba32013-03-14 15:50:45 +0000301 } else {
302 InstructionsGroupCandidate.pop_back();
Vincent Lejeune62f38ca2013-03-05 18:41:32 +0000303 }
304 }
305 return NULL;
306}
307
308void R600SchedStrategy::LoadAlu() {
309 ReadyQueue *QSrc = Pending[IDAlu];
310 for (ReadyQueue::iterator I = QSrc->begin(),
311 E = QSrc->end(); I != E; ++I) {
312 (*I)->NodeQueueId &= ~QSrc->getID();
313 AluKind AK = getAluKind(*I);
314 AvailableAlus[AK].insert(*I);
315 }
316 QSrc->clear();
317}
318
319void R600SchedStrategy::PrepareNextSlot() {
320 DEBUG(dbgs() << "New Slot\n");
321 assert (OccupedSlotsMask && "Slot wasn't filled");
322 OccupedSlotsMask = 0;
Vincent Lejeune3ab0ba32013-03-14 15:50:45 +0000323 InstructionsGroupCandidate.clear();
Vincent Lejeune62f38ca2013-03-05 18:41:32 +0000324 LoadAlu();
325}
326
327void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
328 unsigned DestReg = MI->getOperand(0).getReg();
329 // PressureRegister crashes if an operand is def and used in the same inst
330 // and we try to constraint its regclass
331 for (MachineInstr::mop_iterator It = MI->operands_begin(),
332 E = MI->operands_end(); It != E; ++It) {
333 MachineOperand &MO = *It;
334 if (MO.isReg() && !MO.isDef() &&
335 MO.getReg() == MI->getOperand(0).getReg())
336 return;
337 }
338 // Constrains the regclass of DestReg to assign it to Slot
339 switch (Slot) {
340 case 0:
341 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
342 break;
343 case 1:
344 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
345 break;
346 case 2:
347 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
348 break;
349 case 3:
350 MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
351 break;
352 }
353}
354
355SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
356 static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
357 SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
358 SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
359 if (!UnslotedSU) {
360 return SlotedSU;
361 } else if (!SlotedSU) {
362 AssignSlot(UnslotedSU->getInstr(), Slot);
363 return UnslotedSU;
364 } else {
365 //Determine which one to pick (the lesser one)
366 if (CompareSUnit()(SlotedSU, UnslotedSU)) {
367 AvailableAlus[AluAny].insert(UnslotedSU);
368 return SlotedSU;
369 } else {
370 AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
371 AssignSlot(UnslotedSU->getInstr(), Slot);
372 return UnslotedSU;
373 }
374 }
375}
376
377bool R600SchedStrategy::isAvailablesAluEmpty() const {
378 return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
379 AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
380 AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
381 AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
382}
383
384SUnit* R600SchedStrategy::pickAlu() {
385 while (!isAvailablesAluEmpty()) {
386 if (!OccupedSlotsMask) {
387 // Flush physical reg copies (RA will discard them)
388 if (!AvailableAlus[AluDiscarded].empty()) {
389 OccupedSlotsMask = 15;
390 return PopInst(AvailableAlus[AluDiscarded]);
391 }
392 // If there is a T_XYZW alu available, use it
393 if (!AvailableAlus[AluT_XYZW].empty()) {
394 OccupedSlotsMask = 15;
395 return PopInst(AvailableAlus[AluT_XYZW]);
396 }
397 }
398 for (unsigned Chan = 0; Chan < 4; ++Chan) {
399 bool isOccupied = OccupedSlotsMask & (1 << Chan);
400 if (!isOccupied) {
401 SUnit *SU = AttemptFillSlot(Chan);
402 if (SU) {
403 OccupedSlotsMask |= (1 << Chan);
Vincent Lejeune3ab0ba32013-03-14 15:50:45 +0000404 InstructionsGroupCandidate.push_back(SU->getInstr());
Vincent Lejeune62f38ca2013-03-05 18:41:32 +0000405 return SU;
406 }
407 }
408 }
409 PrepareNextSlot();
410 }
411 return NULL;
412}
413
414SUnit* R600SchedStrategy::pickOther(int QID) {
415 SUnit *SU = 0;
416 ReadyQueue *AQ = Available[QID];
417
418 if (AQ->empty()) {
419 MoveUnits(Pending[QID], AQ);
420 }
421 if (!AQ->empty()) {
422 SU = *AQ->begin();
423 AQ->remove(AQ->begin());
424 }
425 return SU;
426}
427