Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 1 | //===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | /// \file |
| 11 | /// \brief R600 Machine Scheduler interface |
| 12 | // TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #define DEBUG_TYPE "misched" |
| 17 | |
| 18 | #include "R600MachineScheduler.h" |
| 19 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 20 | #include "llvm/CodeGen/LiveIntervalAnalysis.h" |
| 21 | #include "llvm/Pass.h" |
| 22 | #include "llvm/PassManager.h" |
NAKAMURA Takumi | 3f179b5 | 2013-03-11 08:19:28 +0000 | [diff] [blame^] | 23 | #include "llvm/Support/raw_ostream.h" |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 24 | #include <set> |
NAKAMURA Takumi | 3f179b5 | 2013-03-11 08:19:28 +0000 | [diff] [blame^] | 25 | |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 26 | using namespace llvm; |
| 27 | |
| 28 | void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { |
| 29 | |
| 30 | DAG = dag; |
| 31 | TII = static_cast<const R600InstrInfo*>(DAG->TII); |
| 32 | TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); |
| 33 | MRI = &DAG->MRI; |
| 34 | Available[IDAlu]->clear(); |
| 35 | Available[IDFetch]->clear(); |
| 36 | Available[IDOther]->clear(); |
| 37 | CurInstKind = IDOther; |
| 38 | CurEmitted = 0; |
| 39 | OccupedSlotsMask = 15; |
| 40 | memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); |
| 41 | InstKindLimit[IDAlu] = 120; // 120 minus 8 for security |
| 42 | |
| 43 | |
| 44 | const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>(); |
| 45 | if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) { |
| 46 | InstKindLimit[IDFetch] = 7; // 8 minus 1 for security |
| 47 | } else { |
| 48 | InstKindLimit[IDFetch] = 15; // 16 minus 1 for security |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst) |
| 53 | { |
| 54 | if (QSrc->empty()) |
| 55 | return; |
| 56 | for (ReadyQueue::iterator I = QSrc->begin(), |
| 57 | E = QSrc->end(); I != E; ++I) { |
| 58 | (*I)->NodeQueueId &= ~QSrc->getID(); |
| 59 | QDst->push(*I); |
| 60 | } |
| 61 | QSrc->clear(); |
| 62 | } |
| 63 | |
| 64 | SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { |
| 65 | SUnit *SU = 0; |
| 66 | IsTopNode = true; |
| 67 | NextInstKind = IDOther; |
| 68 | |
| 69 | // check if we might want to switch current clause type |
| 70 | bool AllowSwitchToAlu = (CurInstKind == IDOther) || |
| 71 | (CurEmitted > InstKindLimit[CurInstKind]) || |
| 72 | (Available[CurInstKind]->empty()); |
| 73 | bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) && |
| 74 | (!Available[IDFetch]->empty() || !Available[IDOther]->empty()); |
| 75 | |
| 76 | if ((AllowSwitchToAlu && CurInstKind != IDAlu) || |
| 77 | (!AllowSwitchFromAlu && CurInstKind == IDAlu)) { |
| 78 | // try to pick ALU |
| 79 | SU = pickAlu(); |
| 80 | if (SU) { |
| 81 | if (CurEmitted > InstKindLimit[IDAlu]) |
| 82 | CurEmitted = 0; |
| 83 | NextInstKind = IDAlu; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | if (!SU) { |
| 88 | // try to pick FETCH |
| 89 | SU = pickOther(IDFetch); |
| 90 | if (SU) |
| 91 | NextInstKind = IDFetch; |
| 92 | } |
| 93 | |
| 94 | // try to pick other |
| 95 | if (!SU) { |
| 96 | SU = pickOther(IDOther); |
| 97 | if (SU) |
| 98 | NextInstKind = IDOther; |
| 99 | } |
| 100 | |
| 101 | DEBUG( |
| 102 | if (SU) { |
| 103 | dbgs() << "picked node: "; |
| 104 | SU->dump(DAG); |
| 105 | } else { |
| 106 | dbgs() << "NO NODE "; |
| 107 | for (int i = 0; i < IDLast; ++i) { |
| 108 | Available[i]->dump(); |
| 109 | Pending[i]->dump(); |
| 110 | } |
| 111 | for (unsigned i = 0; i < DAG->SUnits.size(); i++) { |
| 112 | const SUnit &S = DAG->SUnits[i]; |
| 113 | if (!S.isScheduled) |
| 114 | S.dump(DAG); |
| 115 | } |
| 116 | } |
| 117 | ); |
| 118 | |
| 119 | return SU; |
| 120 | } |
| 121 | |
| 122 | void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { |
| 123 | |
| 124 | DEBUG(dbgs() << "scheduled: "); |
| 125 | DEBUG(SU->dump(DAG)); |
| 126 | |
| 127 | if (NextInstKind != CurInstKind) { |
| 128 | DEBUG(dbgs() << "Instruction Type Switch\n"); |
| 129 | if (NextInstKind != IDAlu) |
| 130 | OccupedSlotsMask = 15; |
| 131 | CurEmitted = 0; |
| 132 | CurInstKind = NextInstKind; |
| 133 | } |
| 134 | |
| 135 | if (CurInstKind == IDAlu) { |
| 136 | switch (getAluKind(SU)) { |
| 137 | case AluT_XYZW: |
| 138 | CurEmitted += 4; |
| 139 | break; |
| 140 | case AluDiscarded: |
| 141 | break; |
| 142 | default: { |
| 143 | ++CurEmitted; |
| 144 | for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(), |
| 145 | E = SU->getInstr()->operands_end(); It != E; ++It) { |
| 146 | MachineOperand &MO = *It; |
| 147 | if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) |
| 148 | ++CurEmitted; |
| 149 | } |
| 150 | } |
| 151 | } |
| 152 | } else { |
| 153 | ++CurEmitted; |
| 154 | } |
| 155 | |
| 156 | |
| 157 | DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); |
| 158 | |
| 159 | if (CurInstKind != IDFetch) { |
| 160 | MoveUnits(Pending[IDFetch], Available[IDFetch]); |
| 161 | } |
| 162 | MoveUnits(Pending[IDOther], Available[IDOther]); |
| 163 | } |
| 164 | |
| 165 | void R600SchedStrategy::releaseTopNode(SUnit *SU) { |
| 166 | int IK = getInstKind(SU); |
| 167 | |
| 168 | DEBUG(dbgs() << IK << " <= "); |
| 169 | DEBUG(SU->dump(DAG)); |
| 170 | |
| 171 | Pending[IK]->push(SU); |
| 172 | } |
| 173 | |
| 174 | void R600SchedStrategy::releaseBottomNode(SUnit *SU) { |
| 175 | } |
| 176 | |
| 177 | bool R600SchedStrategy::regBelongsToClass(unsigned Reg, |
| 178 | const TargetRegisterClass *RC) const { |
| 179 | if (!TargetRegisterInfo::isVirtualRegister(Reg)) { |
| 180 | return RC->contains(Reg); |
| 181 | } else { |
| 182 | return MRI->getRegClass(Reg) == RC; |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { |
| 187 | MachineInstr *MI = SU->getInstr(); |
| 188 | |
| 189 | switch (MI->getOpcode()) { |
| 190 | case AMDGPU::INTERP_PAIR_XY: |
| 191 | case AMDGPU::INTERP_PAIR_ZW: |
| 192 | case AMDGPU::INTERP_VEC_LOAD: |
| 193 | return AluT_XYZW; |
| 194 | case AMDGPU::COPY: |
| 195 | if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) { |
| 196 | // %vregX = COPY Tn_X is likely to be discarded in favor of an |
| 197 | // assignement of Tn_X to %vregX, don't considers it in scheduling |
| 198 | return AluDiscarded; |
| 199 | } |
| 200 | else if (MI->getOperand(1).isUndef()) { |
| 201 | // MI will become a KILL, don't considers it in scheduling |
| 202 | return AluDiscarded; |
| 203 | } |
| 204 | default: |
| 205 | break; |
| 206 | } |
| 207 | |
| 208 | // Does the instruction take a whole IG ? |
| 209 | if(TII->isVector(*MI) || |
| 210 | TII->isCubeOp(MI->getOpcode()) || |
| 211 | TII->isReductionOp(MI->getOpcode())) |
| 212 | return AluT_XYZW; |
| 213 | |
| 214 | // Is the result already assigned to a channel ? |
| 215 | unsigned DestSubReg = MI->getOperand(0).getSubReg(); |
| 216 | switch (DestSubReg) { |
| 217 | case AMDGPU::sub0: |
| 218 | return AluT_X; |
| 219 | case AMDGPU::sub1: |
| 220 | return AluT_Y; |
| 221 | case AMDGPU::sub2: |
| 222 | return AluT_Z; |
| 223 | case AMDGPU::sub3: |
| 224 | return AluT_W; |
| 225 | default: |
| 226 | break; |
| 227 | } |
| 228 | |
| 229 | // Is the result already member of a X/Y/Z/W class ? |
| 230 | unsigned DestReg = MI->getOperand(0).getReg(); |
| 231 | if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || |
| 232 | regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) |
| 233 | return AluT_X; |
| 234 | if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) |
| 235 | return AluT_Y; |
| 236 | if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) |
| 237 | return AluT_Z; |
| 238 | if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) |
| 239 | return AluT_W; |
| 240 | if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) |
| 241 | return AluT_XYZW; |
| 242 | |
| 243 | return AluAny; |
| 244 | |
| 245 | } |
| 246 | |
| 247 | int R600SchedStrategy::getInstKind(SUnit* SU) { |
| 248 | int Opcode = SU->getInstr()->getOpcode(); |
| 249 | |
| 250 | if (TII->isALUInstr(Opcode)) { |
| 251 | return IDAlu; |
| 252 | } |
| 253 | |
| 254 | switch (Opcode) { |
| 255 | case AMDGPU::COPY: |
| 256 | case AMDGPU::CONST_COPY: |
| 257 | case AMDGPU::INTERP_PAIR_XY: |
| 258 | case AMDGPU::INTERP_PAIR_ZW: |
| 259 | case AMDGPU::INTERP_VEC_LOAD: |
| 260 | case AMDGPU::DOT4_eg_pseudo: |
| 261 | case AMDGPU::DOT4_r600_pseudo: |
| 262 | return IDAlu; |
| 263 | case AMDGPU::TEX_VTX_CONSTBUF: |
| 264 | case AMDGPU::TEX_VTX_TEXBUF: |
| 265 | case AMDGPU::TEX_LD: |
| 266 | case AMDGPU::TEX_GET_TEXTURE_RESINFO: |
| 267 | case AMDGPU::TEX_GET_GRADIENTS_H: |
| 268 | case AMDGPU::TEX_GET_GRADIENTS_V: |
| 269 | case AMDGPU::TEX_SET_GRADIENTS_H: |
| 270 | case AMDGPU::TEX_SET_GRADIENTS_V: |
| 271 | case AMDGPU::TEX_SAMPLE: |
| 272 | case AMDGPU::TEX_SAMPLE_C: |
| 273 | case AMDGPU::TEX_SAMPLE_L: |
| 274 | case AMDGPU::TEX_SAMPLE_C_L: |
| 275 | case AMDGPU::TEX_SAMPLE_LB: |
| 276 | case AMDGPU::TEX_SAMPLE_C_LB: |
| 277 | case AMDGPU::TEX_SAMPLE_G: |
| 278 | case AMDGPU::TEX_SAMPLE_C_G: |
| 279 | case AMDGPU::TXD: |
| 280 | case AMDGPU::TXD_SHADOW: |
| 281 | return IDFetch; |
| 282 | default: |
| 283 | DEBUG( |
| 284 | dbgs() << "other inst: "; |
| 285 | SU->dump(DAG); |
| 286 | ); |
| 287 | return IDOther; |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | class ConstPairs { |
| 292 | private: |
| 293 | unsigned XYPair; |
| 294 | unsigned ZWPair; |
| 295 | public: |
| 296 | ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) { |
| 297 | for (unsigned i = 0; i < 3; i++) { |
| 298 | unsigned ReadConstChan = ReadConst[i] & 3; |
| 299 | unsigned ReadConstIndex = ReadConst[i] & (~3); |
| 300 | if (ReadConstChan < 2) { |
| 301 | if (!XYPair) { |
| 302 | XYPair = ReadConstIndex; |
| 303 | } |
| 304 | } else { |
| 305 | if (!ZWPair) { |
| 306 | ZWPair = ReadConstIndex; |
| 307 | } |
| 308 | } |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | bool isCompatibleWith(const ConstPairs& CP) const { |
| 313 | return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) && |
| 314 | (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair); |
| 315 | } |
| 316 | }; |
| 317 | |
| 318 | static |
| 319 | const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) { |
| 320 | unsigned ReadConsts[3] = {0, 0, 0}; |
| 321 | R600Operands::Ops OpTable[3][2] = { |
| 322 | {R600Operands::SRC0, R600Operands::SRC0_SEL}, |
| 323 | {R600Operands::SRC1, R600Operands::SRC1_SEL}, |
| 324 | {R600Operands::SRC2, R600Operands::SRC2_SEL}, |
| 325 | }; |
| 326 | |
| 327 | if (!TII->isALUInstr(MI.getOpcode())) |
| 328 | return ConstPairs(ReadConsts); |
| 329 | |
| 330 | for (unsigned i = 0; i < 3; i++) { |
| 331 | int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]); |
| 332 | if (SrcIdx < 0) |
| 333 | break; |
| 334 | if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) |
| 335 | ReadConsts[i] =MI.getOperand( |
| 336 | TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm(); |
| 337 | } |
| 338 | return ConstPairs(ReadConsts); |
| 339 | } |
| 340 | |
| 341 | bool |
| 342 | R600SchedStrategy::isBundleable(const MachineInstr& MI) { |
| 343 | const ConstPairs &MIPair = getPairs(TII, MI); |
| 344 | for (unsigned i = 0; i < 4; i++) { |
| 345 | if (!InstructionsGroupCandidate[i]) |
| 346 | continue; |
| 347 | const ConstPairs &IGPair = getPairs(TII, |
| 348 | *InstructionsGroupCandidate[i]->getInstr()); |
| 349 | if (!IGPair.isCompatibleWith(MIPair)) |
| 350 | return false; |
| 351 | } |
| 352 | return true; |
| 353 | } |
| 354 | |
| 355 | SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { |
| 356 | if (Q.empty()) |
| 357 | return NULL; |
| 358 | for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); |
| 359 | It != E; ++It) { |
| 360 | SUnit *SU = *It; |
| 361 | if (isBundleable(*SU->getInstr())) { |
| 362 | Q.erase(It); |
| 363 | return SU; |
| 364 | } |
| 365 | } |
| 366 | return NULL; |
| 367 | } |
| 368 | |
| 369 | void R600SchedStrategy::LoadAlu() { |
| 370 | ReadyQueue *QSrc = Pending[IDAlu]; |
| 371 | for (ReadyQueue::iterator I = QSrc->begin(), |
| 372 | E = QSrc->end(); I != E; ++I) { |
| 373 | (*I)->NodeQueueId &= ~QSrc->getID(); |
| 374 | AluKind AK = getAluKind(*I); |
| 375 | AvailableAlus[AK].insert(*I); |
| 376 | } |
| 377 | QSrc->clear(); |
| 378 | } |
| 379 | |
| 380 | void R600SchedStrategy::PrepareNextSlot() { |
| 381 | DEBUG(dbgs() << "New Slot\n"); |
| 382 | assert (OccupedSlotsMask && "Slot wasn't filled"); |
| 383 | OccupedSlotsMask = 0; |
| 384 | memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); |
| 385 | LoadAlu(); |
| 386 | } |
| 387 | |
| 388 | void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { |
| 389 | unsigned DestReg = MI->getOperand(0).getReg(); |
| 390 | // PressureRegister crashes if an operand is def and used in the same inst |
| 391 | // and we try to constraint its regclass |
| 392 | for (MachineInstr::mop_iterator It = MI->operands_begin(), |
| 393 | E = MI->operands_end(); It != E; ++It) { |
| 394 | MachineOperand &MO = *It; |
| 395 | if (MO.isReg() && !MO.isDef() && |
| 396 | MO.getReg() == MI->getOperand(0).getReg()) |
| 397 | return; |
| 398 | } |
| 399 | // Constrains the regclass of DestReg to assign it to Slot |
| 400 | switch (Slot) { |
| 401 | case 0: |
| 402 | MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass); |
| 403 | break; |
| 404 | case 1: |
| 405 | MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass); |
| 406 | break; |
| 407 | case 2: |
| 408 | MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass); |
| 409 | break; |
| 410 | case 3: |
| 411 | MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass); |
| 412 | break; |
| 413 | } |
| 414 | } |
| 415 | |
| 416 | SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { |
| 417 | static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; |
| 418 | SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); |
| 419 | SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); |
| 420 | if (!UnslotedSU) { |
| 421 | return SlotedSU; |
| 422 | } else if (!SlotedSU) { |
| 423 | AssignSlot(UnslotedSU->getInstr(), Slot); |
| 424 | return UnslotedSU; |
| 425 | } else { |
| 426 | //Determine which one to pick (the lesser one) |
| 427 | if (CompareSUnit()(SlotedSU, UnslotedSU)) { |
| 428 | AvailableAlus[AluAny].insert(UnslotedSU); |
| 429 | return SlotedSU; |
| 430 | } else { |
| 431 | AvailableAlus[IndexToID[Slot]].insert(SlotedSU); |
| 432 | AssignSlot(UnslotedSU->getInstr(), Slot); |
| 433 | return UnslotedSU; |
| 434 | } |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | bool R600SchedStrategy::isAvailablesAluEmpty() const { |
| 439 | return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() && |
| 440 | AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() && |
| 441 | AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() && |
| 442 | AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty(); |
| 443 | } |
| 444 | |
| 445 | SUnit* R600SchedStrategy::pickAlu() { |
| 446 | while (!isAvailablesAluEmpty()) { |
| 447 | if (!OccupedSlotsMask) { |
| 448 | // Flush physical reg copies (RA will discard them) |
| 449 | if (!AvailableAlus[AluDiscarded].empty()) { |
| 450 | OccupedSlotsMask = 15; |
| 451 | return PopInst(AvailableAlus[AluDiscarded]); |
| 452 | } |
| 453 | // If there is a T_XYZW alu available, use it |
| 454 | if (!AvailableAlus[AluT_XYZW].empty()) { |
| 455 | OccupedSlotsMask = 15; |
| 456 | return PopInst(AvailableAlus[AluT_XYZW]); |
| 457 | } |
| 458 | } |
| 459 | for (unsigned Chan = 0; Chan < 4; ++Chan) { |
| 460 | bool isOccupied = OccupedSlotsMask & (1 << Chan); |
| 461 | if (!isOccupied) { |
| 462 | SUnit *SU = AttemptFillSlot(Chan); |
| 463 | if (SU) { |
| 464 | OccupedSlotsMask |= (1 << Chan); |
| 465 | InstructionsGroupCandidate[Chan] = SU; |
| 466 | return SU; |
| 467 | } |
| 468 | } |
| 469 | } |
| 470 | PrepareNextSlot(); |
| 471 | } |
| 472 | return NULL; |
| 473 | } |
| 474 | |
| 475 | SUnit* R600SchedStrategy::pickOther(int QID) { |
| 476 | SUnit *SU = 0; |
| 477 | ReadyQueue *AQ = Available[QID]; |
| 478 | |
| 479 | if (AQ->empty()) { |
| 480 | MoveUnits(Pending[QID], AQ); |
| 481 | } |
| 482 | if (!AQ->empty()) { |
| 483 | SU = *AQ->begin(); |
| 484 | AQ->remove(AQ->begin()); |
| 485 | } |
| 486 | return SU; |
| 487 | } |
| 488 | |