Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 1 | //===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | /// \file |
| 11 | /// \brief R600 Machine Scheduler interface |
| 12 | // TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #define DEBUG_TYPE "misched" |
| 17 | |
| 18 | #include "R600MachineScheduler.h" |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 19 | #include "llvm/CodeGen/LiveIntervalAnalysis.h" |
Benjamin Kramer | 5c35290 | 2013-05-23 17:10:37 +0000 | [diff] [blame] | 20 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 21 | #include "llvm/Pass.h" |
| 22 | #include "llvm/PassManager.h" |
NAKAMURA Takumi | 3f179b5 | 2013-03-11 08:19:28 +0000 | [diff] [blame] | 23 | #include "llvm/Support/raw_ostream.h" |
NAKAMURA Takumi | 3f179b5 | 2013-03-11 08:19:28 +0000 | [diff] [blame] | 24 | |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 25 | using namespace llvm; |
| 26 | |
| 27 | void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { |
| 28 | |
| 29 | DAG = dag; |
| 30 | TII = static_cast<const R600InstrInfo*>(DAG->TII); |
| 31 | TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); |
| 32 | MRI = &DAG->MRI; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 33 | CurInstKind = IDOther; |
| 34 | CurEmitted = 0; |
| 35 | OccupedSlotsMask = 15; |
Vincent Lejeune | dae2a20 | 2013-04-03 16:49:34 +0000 | [diff] [blame] | 36 | InstKindLimit[IDAlu] = TII->getMaxAlusPerClause(); |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 37 | InstKindLimit[IDOther] = 32; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 38 | |
| 39 | const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>(); |
Vincent Lejeune | dcfcf1d | 2013-05-17 16:49:55 +0000 | [diff] [blame] | 40 | InstKindLimit[IDFetch] = ST.getTexVTXClauseSize(); |
Vincent Lejeune | 843c6c2 | 2013-06-07 23:30:34 +0000 | [diff] [blame] | 41 | AluInstCount = 0; |
| 42 | FetchInstCount = 0; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 43 | } |
| 44 | |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 45 | void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc, |
| 46 | std::vector<SUnit *> &QDst) |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 47 | { |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 48 | QDst.insert(QDst.end(), QSrc.begin(), QSrc.end()); |
| 49 | QSrc.clear(); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 50 | } |
| 51 | |
Vincent Lejeune | 843c6c2 | 2013-06-07 23:30:34 +0000 | [diff] [blame] | 52 | static |
| 53 | unsigned getWFCountLimitedByGPR(unsigned GPRCount) { |
| 54 | assert (GPRCount && "GPRCount cannot be 0"); |
| 55 | return 248 / GPRCount; |
| 56 | } |
| 57 | |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 58 | SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { |
| 59 | SUnit *SU = 0; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 60 | NextInstKind = IDOther; |
| 61 | |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 62 | IsTopNode = false; |
| 63 | |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 64 | // check if we might want to switch current clause type |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 65 | bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) || |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 66 | (Available[CurInstKind].empty()); |
Vincent Lejeune | dcfcf1d | 2013-05-17 16:49:55 +0000 | [diff] [blame] | 67 | bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) && |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 68 | (!Available[IDFetch].empty() || !Available[IDOther].empty()); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 69 | |
Vincent Lejeune | 843c6c2 | 2013-06-07 23:30:34 +0000 | [diff] [blame] | 70 | if (CurInstKind == IDAlu && !Available[IDFetch].empty()) { |
| 71 | // We use the heuristic provided by AMD Accelerated Parallel Processing |
| 72 | // OpenCL Programming Guide : |
| 73 | // The approx. number of WF that allows TEX inst to hide ALU inst is : |
| 74 | // 500 (cycles for TEX) / (AluFetchRatio * 8 (cycles for ALU)) |
| 75 | float ALUFetchRationEstimate = |
| 76 | (AluInstCount + AvailablesAluCount() + Pending[IDAlu].size()) / |
| 77 | (FetchInstCount + Available[IDFetch].size()); |
| 78 | unsigned NeededWF = 62.5f / ALUFetchRationEstimate; |
| 79 | DEBUG( dbgs() << NeededWF << " approx. Wavefronts Required\n" ); |
| 80 | // We assume the local GPR requirements to be "dominated" by the requirement |
| 81 | // of the TEX clause (which consumes 128 bits regs) ; ALU inst before and |
| 82 | // after TEX are indeed likely to consume or generate values from/for the |
| 83 | // TEX clause. |
| 84 | // Available[IDFetch].size() * 2 : GPRs required in the Fetch clause |
| 85 | // We assume that fetch instructions are either TnXYZW = TEX TnXYZW (need |
| 86 | // one GPR) or TmXYZW = TnXYZW (need 2 GPR). |
| 87 | // (TODO : use RegisterPressure) |
| 88 | // If we are going too use too many GPR, we flush Fetch instruction to lower |
| 89 | // register pressure on 128 bits regs. |
| 90 | unsigned NearRegisterRequirement = 2 * Available[IDFetch].size(); |
| 91 | if (NeededWF > getWFCountLimitedByGPR(NearRegisterRequirement)) |
| 92 | AllowSwitchFromAlu = true; |
| 93 | } |
| 94 | |
| 95 | |
Tom Stellard | ad7ecc6 | 2013-06-05 03:43:06 +0000 | [diff] [blame] | 96 | // We want to scheduled AR defs as soon as possible to make sure they aren't |
| 97 | // put in a different ALU clause from their uses. |
| 98 | if (!SU && !UnscheduledARDefs.empty()) { |
| 99 | SU = UnscheduledARDefs[0]; |
| 100 | UnscheduledARDefs.erase(UnscheduledARDefs.begin()); |
| 101 | NextInstKind = IDAlu; |
| 102 | } |
| 103 | |
| 104 | if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) || |
| 105 | (!AllowSwitchFromAlu && CurInstKind == IDAlu))) { |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 106 | // try to pick ALU |
| 107 | SU = pickAlu(); |
Vincent Lejeune | 5121197 | 2013-06-05 20:27:35 +0000 | [diff] [blame] | 108 | if (!SU && !PhysicalRegCopy.empty()) { |
| 109 | SU = PhysicalRegCopy.front(); |
| 110 | PhysicalRegCopy.erase(PhysicalRegCopy.begin()); |
| 111 | } |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 112 | if (SU) { |
Vincent Lejeune | dcfcf1d | 2013-05-17 16:49:55 +0000 | [diff] [blame] | 113 | if (CurEmitted >= InstKindLimit[IDAlu]) |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 114 | CurEmitted = 0; |
| 115 | NextInstKind = IDAlu; |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | if (!SU) { |
| 120 | // try to pick FETCH |
| 121 | SU = pickOther(IDFetch); |
| 122 | if (SU) |
| 123 | NextInstKind = IDFetch; |
| 124 | } |
| 125 | |
| 126 | // try to pick other |
| 127 | if (!SU) { |
| 128 | SU = pickOther(IDOther); |
| 129 | if (SU) |
| 130 | NextInstKind = IDOther; |
| 131 | } |
| 132 | |
Tom Stellard | ad7ecc6 | 2013-06-05 03:43:06 +0000 | [diff] [blame] | 133 | // We want to schedule the AR uses as late as possible to make sure that |
| 134 | // the AR defs have been released. |
| 135 | if (!SU && !UnscheduledARUses.empty()) { |
| 136 | SU = UnscheduledARUses[0]; |
| 137 | UnscheduledARUses.erase(UnscheduledARUses.begin()); |
| 138 | NextInstKind = IDAlu; |
| 139 | } |
| 140 | |
| 141 | |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 142 | DEBUG( |
| 143 | if (SU) { |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 144 | dbgs() << " ** Pick node **\n"; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 145 | SU->dump(DAG); |
| 146 | } else { |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 147 | dbgs() << "NO NODE \n"; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 148 | for (unsigned i = 0; i < DAG->SUnits.size(); i++) { |
| 149 | const SUnit &S = DAG->SUnits[i]; |
| 150 | if (!S.isScheduled) |
| 151 | S.dump(DAG); |
| 152 | } |
| 153 | } |
| 154 | ); |
| 155 | |
| 156 | return SU; |
| 157 | } |
| 158 | |
| 159 | void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 160 | if (NextInstKind != CurInstKind) { |
| 161 | DEBUG(dbgs() << "Instruction Type Switch\n"); |
| 162 | if (NextInstKind != IDAlu) |
| 163 | OccupedSlotsMask = 15; |
| 164 | CurEmitted = 0; |
| 165 | CurInstKind = NextInstKind; |
| 166 | } |
| 167 | |
| 168 | if (CurInstKind == IDAlu) { |
Vincent Lejeune | 843c6c2 | 2013-06-07 23:30:34 +0000 | [diff] [blame] | 169 | AluInstCount ++; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 170 | switch (getAluKind(SU)) { |
| 171 | case AluT_XYZW: |
| 172 | CurEmitted += 4; |
| 173 | break; |
| 174 | case AluDiscarded: |
| 175 | break; |
| 176 | default: { |
| 177 | ++CurEmitted; |
| 178 | for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(), |
| 179 | E = SU->getInstr()->operands_end(); It != E; ++It) { |
| 180 | MachineOperand &MO = *It; |
| 181 | if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) |
| 182 | ++CurEmitted; |
| 183 | } |
| 184 | } |
| 185 | } |
| 186 | } else { |
| 187 | ++CurEmitted; |
| 188 | } |
| 189 | |
| 190 | |
| 191 | DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); |
| 192 | |
| 193 | if (CurInstKind != IDFetch) { |
| 194 | MoveUnits(Pending[IDFetch], Available[IDFetch]); |
Vincent Lejeune | 843c6c2 | 2013-06-07 23:30:34 +0000 | [diff] [blame] | 195 | } else |
| 196 | FetchInstCount++; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 197 | } |
| 198 | |
Vincent Lejeune | 5121197 | 2013-06-05 20:27:35 +0000 | [diff] [blame] | 199 | static bool |
| 200 | isPhysicalRegCopy(MachineInstr *MI) { |
| 201 | if (MI->getOpcode() != AMDGPU::COPY) |
| 202 | return false; |
| 203 | |
| 204 | return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()); |
| 205 | } |
| 206 | |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 207 | void R600SchedStrategy::releaseTopNode(SUnit *SU) { |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 208 | DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG);); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 209 | } |
| 210 | |
| 211 | void R600SchedStrategy::releaseBottomNode(SUnit *SU) { |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 212 | DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG);); |
Vincent Lejeune | 5121197 | 2013-06-05 20:27:35 +0000 | [diff] [blame] | 213 | if (isPhysicalRegCopy(SU->getInstr())) { |
| 214 | PhysicalRegCopy.push_back(SU); |
| 215 | return; |
| 216 | } |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 217 | |
| 218 | int IK = getInstKind(SU); |
Tom Stellard | ad7ecc6 | 2013-06-05 03:43:06 +0000 | [diff] [blame] | 219 | |
| 220 | // Check for AR register defines |
| 221 | for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(), |
| 222 | E = SU->getInstr()->operands_end(); |
| 223 | I != E; ++I) { |
| 224 | if (I->isReg() && I->getReg() == AMDGPU::AR_X) { |
| 225 | if (I->isDef()) { |
| 226 | UnscheduledARDefs.push_back(SU); |
| 227 | } else { |
| 228 | UnscheduledARUses.push_back(SU); |
| 229 | } |
| 230 | return; |
| 231 | } |
| 232 | } |
| 233 | |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 234 | // There is no export clause, we can schedule one as soon as its ready |
| 235 | if (IK == IDOther) |
| 236 | Available[IDOther].push_back(SU); |
| 237 | else |
| 238 | Pending[IK].push_back(SU); |
| 239 | |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 240 | } |
| 241 | |
| 242 | bool R600SchedStrategy::regBelongsToClass(unsigned Reg, |
| 243 | const TargetRegisterClass *RC) const { |
| 244 | if (!TargetRegisterInfo::isVirtualRegister(Reg)) { |
| 245 | return RC->contains(Reg); |
| 246 | } else { |
| 247 | return MRI->getRegClass(Reg) == RC; |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { |
| 252 | MachineInstr *MI = SU->getInstr(); |
| 253 | |
| 254 | switch (MI->getOpcode()) { |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 255 | case AMDGPU::PRED_X: |
| 256 | return AluPredX; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 257 | case AMDGPU::INTERP_PAIR_XY: |
| 258 | case AMDGPU::INTERP_PAIR_ZW: |
| 259 | case AMDGPU::INTERP_VEC_LOAD: |
Vincent Lejeune | 4ed9917 | 2013-05-17 16:50:32 +0000 | [diff] [blame] | 260 | case AMDGPU::DOT_4: |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 261 | return AluT_XYZW; |
| 262 | case AMDGPU::COPY: |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 263 | if (MI->getOperand(1).isUndef()) { |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 264 | // MI will become a KILL, don't considers it in scheduling |
| 265 | return AluDiscarded; |
| 266 | } |
| 267 | default: |
| 268 | break; |
| 269 | } |
| 270 | |
| 271 | // Does the instruction take a whole IG ? |
| 272 | if(TII->isVector(*MI) || |
| 273 | TII->isCubeOp(MI->getOpcode()) || |
| 274 | TII->isReductionOp(MI->getOpcode())) |
| 275 | return AluT_XYZW; |
| 276 | |
| 277 | // Is the result already assigned to a channel ? |
| 278 | unsigned DestSubReg = MI->getOperand(0).getSubReg(); |
| 279 | switch (DestSubReg) { |
| 280 | case AMDGPU::sub0: |
| 281 | return AluT_X; |
| 282 | case AMDGPU::sub1: |
| 283 | return AluT_Y; |
| 284 | case AMDGPU::sub2: |
| 285 | return AluT_Z; |
| 286 | case AMDGPU::sub3: |
| 287 | return AluT_W; |
| 288 | default: |
| 289 | break; |
| 290 | } |
| 291 | |
| 292 | // Is the result already member of a X/Y/Z/W class ? |
| 293 | unsigned DestReg = MI->getOperand(0).getReg(); |
| 294 | if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || |
| 295 | regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) |
| 296 | return AluT_X; |
| 297 | if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) |
| 298 | return AluT_Y; |
| 299 | if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) |
| 300 | return AluT_Z; |
| 301 | if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) |
| 302 | return AluT_W; |
| 303 | if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) |
| 304 | return AluT_XYZW; |
| 305 | |
| 306 | return AluAny; |
| 307 | |
| 308 | } |
| 309 | |
| 310 | int R600SchedStrategy::getInstKind(SUnit* SU) { |
| 311 | int Opcode = SU->getInstr()->getOpcode(); |
| 312 | |
Vincent Lejeune | f63f85a | 2013-05-17 16:50:37 +0000 | [diff] [blame] | 313 | if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode)) |
| 314 | return IDFetch; |
| 315 | |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 316 | if (TII->isALUInstr(Opcode)) { |
| 317 | return IDAlu; |
| 318 | } |
| 319 | |
| 320 | switch (Opcode) { |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 321 | case AMDGPU::PRED_X: |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 322 | case AMDGPU::COPY: |
| 323 | case AMDGPU::CONST_COPY: |
| 324 | case AMDGPU::INTERP_PAIR_XY: |
| 325 | case AMDGPU::INTERP_PAIR_ZW: |
| 326 | case AMDGPU::INTERP_VEC_LOAD: |
Vincent Lejeune | 4ed9917 | 2013-05-17 16:50:32 +0000 | [diff] [blame] | 327 | case AMDGPU::DOT_4: |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 328 | return IDAlu; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 329 | default: |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 330 | return IDOther; |
| 331 | } |
| 332 | } |
| 333 | |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 334 | SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) { |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 335 | if (Q.empty()) |
| 336 | return NULL; |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 337 | for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend(); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 338 | It != E; ++It) { |
| 339 | SUnit *SU = *It; |
Vincent Lejeune | 3ab0ba3 | 2013-03-14 15:50:45 +0000 | [diff] [blame] | 340 | InstructionsGroupCandidate.push_back(SU->getInstr()); |
| 341 | if (TII->canBundle(InstructionsGroupCandidate)) { |
| 342 | InstructionsGroupCandidate.pop_back(); |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 343 | Q.erase((It + 1).base()); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 344 | return SU; |
Vincent Lejeune | 3ab0ba3 | 2013-03-14 15:50:45 +0000 | [diff] [blame] | 345 | } else { |
| 346 | InstructionsGroupCandidate.pop_back(); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 347 | } |
| 348 | } |
| 349 | return NULL; |
| 350 | } |
| 351 | |
| 352 | void R600SchedStrategy::LoadAlu() { |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 353 | std::vector<SUnit *> &QSrc = Pending[IDAlu]; |
| 354 | for (unsigned i = 0, e = QSrc.size(); i < e; ++i) { |
| 355 | AluKind AK = getAluKind(QSrc[i]); |
| 356 | AvailableAlus[AK].push_back(QSrc[i]); |
| 357 | } |
| 358 | QSrc.clear(); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 359 | } |
| 360 | |
| 361 | void R600SchedStrategy::PrepareNextSlot() { |
| 362 | DEBUG(dbgs() << "New Slot\n"); |
| 363 | assert (OccupedSlotsMask && "Slot wasn't filled"); |
| 364 | OccupedSlotsMask = 0; |
Vincent Lejeune | 3ab0ba3 | 2013-03-14 15:50:45 +0000 | [diff] [blame] | 365 | InstructionsGroupCandidate.clear(); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 366 | LoadAlu(); |
| 367 | } |
| 368 | |
| 369 | void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { |
| 370 | unsigned DestReg = MI->getOperand(0).getReg(); |
| 371 | // PressureRegister crashes if an operand is def and used in the same inst |
| 372 | // and we try to constraint its regclass |
| 373 | for (MachineInstr::mop_iterator It = MI->operands_begin(), |
| 374 | E = MI->operands_end(); It != E; ++It) { |
| 375 | MachineOperand &MO = *It; |
| 376 | if (MO.isReg() && !MO.isDef() && |
| 377 | MO.getReg() == MI->getOperand(0).getReg()) |
| 378 | return; |
| 379 | } |
| 380 | // Constrains the regclass of DestReg to assign it to Slot |
| 381 | switch (Slot) { |
| 382 | case 0: |
| 383 | MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass); |
| 384 | break; |
| 385 | case 1: |
| 386 | MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass); |
| 387 | break; |
| 388 | case 2: |
| 389 | MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass); |
| 390 | break; |
| 391 | case 3: |
| 392 | MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass); |
| 393 | break; |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { |
| 398 | static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; |
| 399 | SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 400 | if (SlotedSU) |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 401 | return SlotedSU; |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 402 | SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); |
| 403 | if (UnslotedSU) |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 404 | AssignSlot(UnslotedSU->getInstr(), Slot); |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 405 | return UnslotedSU; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 406 | } |
| 407 | |
Vincent Lejeune | 843c6c2 | 2013-06-07 23:30:34 +0000 | [diff] [blame] | 408 | unsigned R600SchedStrategy::AvailablesAluCount() const { |
| 409 | return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() + |
| 410 | AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() + |
| 411 | AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() + |
| 412 | AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size(); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 413 | } |
| 414 | |
| 415 | SUnit* R600SchedStrategy::pickAlu() { |
Vincent Lejeune | 843c6c2 | 2013-06-07 23:30:34 +0000 | [diff] [blame] | 416 | while (AvailablesAluCount() || !Pending[IDAlu].empty()) { |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 417 | if (!OccupedSlotsMask) { |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 418 | // Bottom up scheduling : predX must comes first |
| 419 | if (!AvailableAlus[AluPredX].empty()) { |
| 420 | OccupedSlotsMask = 15; |
| 421 | return PopInst(AvailableAlus[AluPredX]); |
| 422 | } |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 423 | // Flush physical reg copies (RA will discard them) |
| 424 | if (!AvailableAlus[AluDiscarded].empty()) { |
| 425 | OccupedSlotsMask = 15; |
| 426 | return PopInst(AvailableAlus[AluDiscarded]); |
| 427 | } |
| 428 | // If there is a T_XYZW alu available, use it |
| 429 | if (!AvailableAlus[AluT_XYZW].empty()) { |
| 430 | OccupedSlotsMask = 15; |
| 431 | return PopInst(AvailableAlus[AluT_XYZW]); |
| 432 | } |
| 433 | } |
Vincent Lejeune | 76fc2d0 | 2013-05-17 16:50:56 +0000 | [diff] [blame] | 434 | for (int Chan = 3; Chan > -1; --Chan) { |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 435 | bool isOccupied = OccupedSlotsMask & (1 << Chan); |
| 436 | if (!isOccupied) { |
| 437 | SUnit *SU = AttemptFillSlot(Chan); |
| 438 | if (SU) { |
| 439 | OccupedSlotsMask |= (1 << Chan); |
Vincent Lejeune | 3ab0ba3 | 2013-03-14 15:50:45 +0000 | [diff] [blame] | 440 | InstructionsGroupCandidate.push_back(SU->getInstr()); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 441 | return SU; |
| 442 | } |
| 443 | } |
| 444 | } |
| 445 | PrepareNextSlot(); |
| 446 | } |
| 447 | return NULL; |
| 448 | } |
| 449 | |
| 450 | SUnit* R600SchedStrategy::pickOther(int QID) { |
| 451 | SUnit *SU = 0; |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 452 | std::vector<SUnit *> &AQ = Available[QID]; |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 453 | |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 454 | if (AQ.empty()) { |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 455 | MoveUnits(Pending[QID], AQ); |
| 456 | } |
Vincent Lejeune | 21ca0b3 | 2013-05-17 16:50:44 +0000 | [diff] [blame] | 457 | if (!AQ.empty()) { |
| 458 | SU = AQ.back(); |
| 459 | AQ.resize(AQ.size() - 1); |
Vincent Lejeune | 62f38ca | 2013-03-05 18:41:32 +0000 | [diff] [blame] | 460 | } |
| 461 | return SU; |
| 462 | } |
| 463 | |