Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame^] | 1 | //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | /// \file |
| 11 | /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold |
| 12 | /// 128 Alu instructions ; these instructions can access up to 4 prefetched |
| 13 | /// 4 lines of 16 registers from constant buffers. Such ALU clauses are |
| 14 | /// initiated by CF_ALU instructions. |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #include "AMDGPU.h" |
| 18 | #include "R600Defines.h" |
| 19 | #include "R600InstrInfo.h" |
| 20 | #include "R600MachineFunctionInfo.h" |
| 21 | #include "R600RegisterInfo.h" |
| 22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 23 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 25 | |
| 26 | namespace llvm { |
| 27 | |
| 28 | class R600EmitClauseMarkersPass : public MachineFunctionPass { |
| 29 | |
| 30 | private: |
| 31 | static char ID; |
| 32 | const R600InstrInfo *TII; |
| 33 | |
| 34 | unsigned OccupiedDwords(MachineInstr *MI) const { |
| 35 | switch (MI->getOpcode()) { |
| 36 | case AMDGPU::INTERP_PAIR_XY: |
| 37 | case AMDGPU::INTERP_PAIR_ZW: |
| 38 | case AMDGPU::INTERP_VEC_LOAD: |
| 39 | case AMDGPU::DOT4_eg_pseudo: |
| 40 | case AMDGPU::DOT4_r600_pseudo: |
| 41 | return 4; |
| 42 | case AMDGPU::KILL: |
| 43 | return 0; |
| 44 | default: |
| 45 | break; |
| 46 | } |
| 47 | |
| 48 | if(TII->isVector(*MI) || |
| 49 | TII->isCubeOp(MI->getOpcode()) || |
| 50 | TII->isReductionOp(MI->getOpcode())) |
| 51 | return 4; |
| 52 | |
| 53 | unsigned NumLiteral = 0; |
| 54 | for (MachineInstr::mop_iterator It = MI->operands_begin(), |
| 55 | E = MI->operands_end(); It != E; ++It) { |
| 56 | MachineOperand &MO = *It; |
| 57 | if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) |
| 58 | ++NumLiteral; |
| 59 | } |
| 60 | return 1 + NumLiteral; |
| 61 | } |
| 62 | |
| 63 | bool isALU(const MachineInstr *MI) const { |
| 64 | if (MI->getOpcode() == AMDGPU::KILLGT) |
| 65 | return false; |
| 66 | if (TII->isALUInstr(MI->getOpcode())) |
| 67 | return true; |
| 68 | if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode())) |
| 69 | return true; |
| 70 | switch (MI->getOpcode()) { |
| 71 | case AMDGPU::PRED_X: |
| 72 | case AMDGPU::INTERP_PAIR_XY: |
| 73 | case AMDGPU::INTERP_PAIR_ZW: |
| 74 | case AMDGPU::INTERP_VEC_LOAD: |
| 75 | case AMDGPU::COPY: |
| 76 | case AMDGPU::DOT4_eg_pseudo: |
| 77 | case AMDGPU::DOT4_r600_pseudo: |
| 78 | return true; |
| 79 | default: |
| 80 | return false; |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | bool IsTrivialInst(MachineInstr *MI) const { |
| 85 | switch (MI->getOpcode()) { |
| 86 | case AMDGPU::KILL: |
| 87 | case AMDGPU::RETURN: |
| 88 | return true; |
| 89 | default: |
| 90 | return false; |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | // Register Idx, then Const value |
| 95 | std::vector<std::pair<unsigned, unsigned> > ExtractConstRead(MachineInstr *MI) |
| 96 | const { |
| 97 | const R600Operands::Ops OpTable[3][2] = { |
| 98 | {R600Operands::SRC0, R600Operands::SRC0_SEL}, |
| 99 | {R600Operands::SRC1, R600Operands::SRC1_SEL}, |
| 100 | {R600Operands::SRC2, R600Operands::SRC2_SEL}, |
| 101 | }; |
| 102 | std::vector<std::pair<unsigned, unsigned> > Result; |
| 103 | |
| 104 | if (!TII->isALUInstr(MI->getOpcode())) |
| 105 | return Result; |
| 106 | for (unsigned j = 0; j < 3; j++) { |
| 107 | int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[j][0]); |
| 108 | if (SrcIdx < 0) |
| 109 | break; |
| 110 | if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { |
| 111 | unsigned Const = MI->getOperand( |
| 112 | TII->getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); |
| 113 | Result.push_back(std::pair<unsigned, unsigned>(SrcIdx, Const)); |
| 114 | } |
| 115 | } |
| 116 | return Result; |
| 117 | } |
| 118 | |
| 119 | std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const { |
| 120 | // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 |
| 121 | // (See also R600ISelLowering.cpp) |
| 122 | // ConstIndex value is in [0, 4095]; |
| 123 | return std::pair<unsigned, unsigned>( |
| 124 | ((Sel >> 2) - 512) >> 12, // KC_BANK |
| 125 | // Line Number of ConstIndex |
| 126 | // A line contains 16 constant registers however KCX bank can lock |
| 127 | // two line at the same time ; thus we want to get an even line number. |
| 128 | // Line number can be retrieved with (>>4), using (>>5) <<1 generates |
| 129 | // an even number. |
| 130 | ((((Sel >> 2) - 512) & 4095) >> 5) << 1); |
| 131 | } |
| 132 | |
| 133 | bool SubstituteKCacheBank(MachineInstr *MI, |
| 134 | std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const { |
| 135 | std::vector<std::pair<unsigned, unsigned> > UsedKCache; |
| 136 | std::vector<std::pair<unsigned, unsigned> > Consts = ExtractConstRead(MI); |
| 137 | assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const"); |
| 138 | for (unsigned i = 0, n = Consts.size(); i < n; ++i) { |
| 139 | unsigned Sel = Consts[i].second; |
| 140 | unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; |
| 141 | unsigned KCacheIndex = Index * 4 + Chan; |
| 142 | const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel); |
| 143 | if (CachedConsts.empty()) { |
| 144 | CachedConsts.push_back(BankLine); |
| 145 | UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); |
| 146 | continue; |
| 147 | } |
| 148 | if (CachedConsts[0] == BankLine) { |
| 149 | UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); |
| 150 | continue; |
| 151 | } |
| 152 | if (CachedConsts.size() == 1) { |
| 153 | CachedConsts.push_back(BankLine); |
| 154 | UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); |
| 155 | continue; |
| 156 | } |
| 157 | if (CachedConsts[1] == BankLine) { |
| 158 | UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); |
| 159 | continue; |
| 160 | } |
| 161 | return false; |
| 162 | } |
| 163 | |
| 164 | for (unsigned i = 0, n = Consts.size(); i < n; ++i) { |
| 165 | switch(UsedKCache[i].first) { |
| 166 | case 0: |
| 167 | MI->getOperand(Consts[i].first).setReg( |
| 168 | AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[i].second)); |
| 169 | break; |
| 170 | case 1: |
| 171 | MI->getOperand(Consts[i].first).setReg( |
| 172 | AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[i].second)); |
| 173 | break; |
| 174 | default: |
| 175 | llvm_unreachable("Wrong Cache Line"); |
| 176 | } |
| 177 | } |
| 178 | return true; |
| 179 | } |
| 180 | |
| 181 | MachineBasicBlock::iterator |
| 182 | MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { |
| 183 | MachineBasicBlock::iterator ClauseHead = I; |
| 184 | std::vector<std::pair<unsigned, unsigned> > KCacheBanks; |
| 185 | bool PushBeforeModifier = false; |
| 186 | unsigned AluInstCount = 0; |
| 187 | for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { |
| 188 | if (IsTrivialInst(I)) |
| 189 | continue; |
| 190 | if (!isALU(I)) |
| 191 | break; |
| 192 | if (I->getOpcode() == AMDGPU::PRED_X) { |
| 193 | if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH) |
| 194 | PushBeforeModifier = true; |
| 195 | AluInstCount ++; |
| 196 | continue; |
| 197 | } |
| 198 | if (TII->isALUInstr(I->getOpcode()) && |
| 199 | !SubstituteKCacheBank(I, KCacheBanks)) |
| 200 | break; |
| 201 | AluInstCount += OccupiedDwords(I); |
| 202 | if (AluInstCount > 124) |
| 203 | break; |
| 204 | } |
| 205 | unsigned Opcode = PushBeforeModifier ? |
| 206 | AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; |
| 207 | BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) |
| 208 | .addImm(0) // ADDR |
| 209 | .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 |
| 210 | .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 |
| 211 | .addImm(KCacheBanks.empty()?0:2) // KM0 |
| 212 | .addImm((KCacheBanks.size() < 2)?0:2) // KM1 |
| 213 | .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 |
| 214 | .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 |
| 215 | .addImm(AluInstCount); // COUNT |
| 216 | return I; |
| 217 | } |
| 218 | |
| 219 | public: |
| 220 | R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID), |
| 221 | TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } |
| 222 | |
| 223 | virtual bool runOnMachineFunction(MachineFunction &MF) { |
| 224 | for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); |
| 225 | BB != BB_E; ++BB) { |
| 226 | MachineBasicBlock &MBB = *BB; |
| 227 | MachineBasicBlock::iterator I = MBB.begin(); |
| 228 | if (I->getOpcode() == AMDGPU::CF_ALU) |
| 229 | continue; // BB was already parsed |
| 230 | for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { |
| 231 | if (isALU(I)) |
| 232 | I = MakeALUClause(MBB, I); |
| 233 | else |
| 234 | ++I; |
| 235 | } |
| 236 | } |
| 237 | return false; |
| 238 | } |
| 239 | |
| 240 | const char *getPassName() const { |
| 241 | return "R600 Emit Clause Markers Pass"; |
| 242 | } |
| 243 | }; |
| 244 | |
| 245 | char R600EmitClauseMarkersPass::ID = 0; |
| 246 | |
| 247 | } |
| 248 | |
| 249 | |
| 250 | llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) { |
| 251 | return new R600EmitClauseMarkersPass(TM); |
| 252 | } |
| 253 | |