Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 1 | //===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | /// \file |
| 11 | /// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold |
| 12 | /// 128 Alu instructions ; these instructions can access up to 4 prefetched |
| 13 | /// 4 lines of 16 registers from constant buffers. Such ALU clauses are |
| 14 | /// initiated by CF_ALU instructions. |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #include "AMDGPU.h" |
| 18 | #include "R600Defines.h" |
| 19 | #include "R600InstrInfo.h" |
| 20 | #include "R600MachineFunctionInfo.h" |
| 21 | #include "R600RegisterInfo.h" |
| 22 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 23 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 24 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 25 | |
Benjamin Kramer | d78bb46 | 2013-05-23 17:10:37 +0000 | [diff] [blame] | 26 | using namespace llvm; |
| 27 | |
| 28 | namespace { |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 29 | |
| 30 | class R600EmitClauseMarkersPass : public MachineFunctionPass { |
| 31 | |
| 32 | private: |
| 33 | static char ID; |
| 34 | const R600InstrInfo *TII; |
| 35 | |
| 36 | unsigned OccupiedDwords(MachineInstr *MI) const { |
| 37 | switch (MI->getOpcode()) { |
| 38 | case AMDGPU::INTERP_PAIR_XY: |
| 39 | case AMDGPU::INTERP_PAIR_ZW: |
| 40 | case AMDGPU::INTERP_VEC_LOAD: |
Vincent Lejeune | 519f21e | 2013-05-17 16:50:32 +0000 | [diff] [blame] | 41 | case AMDGPU::DOT_4: |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 42 | return 4; |
| 43 | case AMDGPU::KILL: |
| 44 | return 0; |
| 45 | default: |
| 46 | break; |
| 47 | } |
| 48 | |
| 49 | if(TII->isVector(*MI) || |
| 50 | TII->isCubeOp(MI->getOpcode()) || |
| 51 | TII->isReductionOp(MI->getOpcode())) |
| 52 | return 4; |
| 53 | |
| 54 | unsigned NumLiteral = 0; |
| 55 | for (MachineInstr::mop_iterator It = MI->operands_begin(), |
| 56 | E = MI->operands_end(); It != E; ++It) { |
| 57 | MachineOperand &MO = *It; |
| 58 | if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) |
| 59 | ++NumLiteral; |
| 60 | } |
| 61 | return 1 + NumLiteral; |
| 62 | } |
| 63 | |
| 64 | bool isALU(const MachineInstr *MI) const { |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 65 | if (TII->isALUInstr(MI->getOpcode())) |
| 66 | return true; |
| 67 | if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode())) |
| 68 | return true; |
| 69 | switch (MI->getOpcode()) { |
| 70 | case AMDGPU::PRED_X: |
| 71 | case AMDGPU::INTERP_PAIR_XY: |
| 72 | case AMDGPU::INTERP_PAIR_ZW: |
| 73 | case AMDGPU::INTERP_VEC_LOAD: |
| 74 | case AMDGPU::COPY: |
Vincent Lejeune | 519f21e | 2013-05-17 16:50:32 +0000 | [diff] [blame] | 75 | case AMDGPU::DOT_4: |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 76 | return true; |
| 77 | default: |
| 78 | return false; |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | bool IsTrivialInst(MachineInstr *MI) const { |
| 83 | switch (MI->getOpcode()) { |
| 84 | case AMDGPU::KILL: |
| 85 | case AMDGPU::RETURN: |
| 86 | return true; |
| 87 | default: |
| 88 | return false; |
| 89 | } |
| 90 | } |
| 91 | |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 92 | std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const { |
| 93 | // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 |
| 94 | // (See also R600ISelLowering.cpp) |
| 95 | // ConstIndex value is in [0, 4095]; |
| 96 | return std::pair<unsigned, unsigned>( |
| 97 | ((Sel >> 2) - 512) >> 12, // KC_BANK |
| 98 | // Line Number of ConstIndex |
| 99 | // A line contains 16 constant registers however KCX bank can lock |
| 100 | // two line at the same time ; thus we want to get an even line number. |
| 101 | // Line number can be retrieved with (>>4), using (>>5) <<1 generates |
| 102 | // an even number. |
| 103 | ((((Sel >> 2) - 512) & 4095) >> 5) << 1); |
| 104 | } |
| 105 | |
| 106 | bool SubstituteKCacheBank(MachineInstr *MI, |
| 107 | std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const { |
| 108 | std::vector<std::pair<unsigned, unsigned> > UsedKCache; |
Vincent Lejeune | 0fca91d | 2013-05-17 16:50:02 +0000 | [diff] [blame] | 109 | const SmallVector<std::pair<MachineOperand *, int64_t>, 3> &Consts = |
| 110 | TII->getSrcs(MI); |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 111 | assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const"); |
| 112 | for (unsigned i = 0, n = Consts.size(); i < n; ++i) { |
Vincent Lejeune | 0fca91d | 2013-05-17 16:50:02 +0000 | [diff] [blame] | 113 | if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) |
| 114 | continue; |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 115 | unsigned Sel = Consts[i].second; |
| 116 | unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; |
| 117 | unsigned KCacheIndex = Index * 4 + Chan; |
| 118 | const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel); |
| 119 | if (CachedConsts.empty()) { |
| 120 | CachedConsts.push_back(BankLine); |
| 121 | UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); |
| 122 | continue; |
| 123 | } |
| 124 | if (CachedConsts[0] == BankLine) { |
| 125 | UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); |
| 126 | continue; |
| 127 | } |
| 128 | if (CachedConsts.size() == 1) { |
| 129 | CachedConsts.push_back(BankLine); |
| 130 | UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); |
| 131 | continue; |
| 132 | } |
| 133 | if (CachedConsts[1] == BankLine) { |
| 134 | UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); |
| 135 | continue; |
| 136 | } |
| 137 | return false; |
| 138 | } |
| 139 | |
Vincent Lejeune | 0fca91d | 2013-05-17 16:50:02 +0000 | [diff] [blame] | 140 | for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) { |
| 141 | if (Consts[i].first->getReg() != AMDGPU::ALU_CONST) |
| 142 | continue; |
| 143 | switch(UsedKCache[j].first) { |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 144 | case 0: |
Vincent Lejeune | 0fca91d | 2013-05-17 16:50:02 +0000 | [diff] [blame] | 145 | Consts[i].first->setReg( |
| 146 | AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second)); |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 147 | break; |
| 148 | case 1: |
Vincent Lejeune | 0fca91d | 2013-05-17 16:50:02 +0000 | [diff] [blame] | 149 | Consts[i].first->setReg( |
| 150 | AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second)); |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 151 | break; |
| 152 | default: |
| 153 | llvm_unreachable("Wrong Cache Line"); |
| 154 | } |
Vincent Lejeune | 0fca91d | 2013-05-17 16:50:02 +0000 | [diff] [blame] | 155 | j++; |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 156 | } |
| 157 | return true; |
| 158 | } |
| 159 | |
| 160 | MachineBasicBlock::iterator |
| 161 | MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { |
| 162 | MachineBasicBlock::iterator ClauseHead = I; |
| 163 | std::vector<std::pair<unsigned, unsigned> > KCacheBanks; |
| 164 | bool PushBeforeModifier = false; |
| 165 | unsigned AluInstCount = 0; |
| 166 | for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { |
| 167 | if (IsTrivialInst(I)) |
| 168 | continue; |
| 169 | if (!isALU(I)) |
| 170 | break; |
Vincent Lejeune | c3d3f9b | 2013-04-03 18:24:47 +0000 | [diff] [blame] | 171 | if (AluInstCount > TII->getMaxAlusPerClause()) |
| 172 | break; |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 173 | if (I->getOpcode() == AMDGPU::PRED_X) { |
| 174 | if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH) |
| 175 | PushBeforeModifier = true; |
| 176 | AluInstCount ++; |
| 177 | continue; |
| 178 | } |
Vincent Lejeune | 9931298 | 2013-04-03 16:24:04 +0000 | [diff] [blame] | 179 | if (I->getOpcode() == AMDGPU::KILLGT) { |
| 180 | I++; |
| 181 | break; |
| 182 | } |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 183 | if (TII->isALUInstr(I->getOpcode()) && |
| 184 | !SubstituteKCacheBank(I, KCacheBanks)) |
| 185 | break; |
| 186 | AluInstCount += OccupiedDwords(I); |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 187 | } |
| 188 | unsigned Opcode = PushBeforeModifier ? |
| 189 | AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; |
| 190 | BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) |
| 191 | .addImm(0) // ADDR |
| 192 | .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 |
| 193 | .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 |
| 194 | .addImm(KCacheBanks.empty()?0:2) // KM0 |
| 195 | .addImm((KCacheBanks.size() < 2)?0:2) // KM1 |
| 196 | .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 |
| 197 | .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 |
| 198 | .addImm(AluInstCount); // COUNT |
| 199 | return I; |
| 200 | } |
| 201 | |
| 202 | public: |
| 203 | R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID), |
| 204 | TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } |
| 205 | |
| 206 | virtual bool runOnMachineFunction(MachineFunction &MF) { |
| 207 | for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); |
| 208 | BB != BB_E; ++BB) { |
| 209 | MachineBasicBlock &MBB = *BB; |
| 210 | MachineBasicBlock::iterator I = MBB.begin(); |
| 211 | if (I->getOpcode() == AMDGPU::CF_ALU) |
| 212 | continue; // BB was already parsed |
| 213 | for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { |
| 214 | if (isALU(I)) |
| 215 | I = MakeALUClause(MBB, I); |
| 216 | else |
| 217 | ++I; |
| 218 | } |
| 219 | } |
| 220 | return false; |
| 221 | } |
| 222 | |
| 223 | const char *getPassName() const { |
| 224 | return "R600 Emit Clause Markers Pass"; |
| 225 | } |
| 226 | }; |
| 227 | |
| 228 | char R600EmitClauseMarkersPass::ID = 0; |
| 229 | |
Benjamin Kramer | d78bb46 | 2013-05-23 17:10:37 +0000 | [diff] [blame] | 230 | } // end anonymous namespace |
Vincent Lejeune | f43bc57 | 2013-04-01 21:47:42 +0000 | [diff] [blame] | 231 | |
| 232 | |
| 233 | llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) { |
| 234 | return new R600EmitClauseMarkersPass(TM); |
| 235 | } |
| 236 | |