blob: 0d8ccd088ec4e1291b9eb6408cf6a9dee7fb7844 [file] [log] [blame]
Vincent Lejeunef43bc572013-04-01 21:47:42 +00001//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold
12/// 128 Alu instructions ; these instructions can access up to 4 prefetched
13/// 4 lines of 16 registers from constant buffers. Such ALU clauses are
14/// initiated by CF_ALU instructions.
15//===----------------------------------------------------------------------===//
16
17#include "AMDGPU.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000018#include "AMDGPUSubtarget.h"
Vincent Lejeunef43bc572013-04-01 21:47:42 +000019#include "R600Defines.h"
20#include "R600InstrInfo.h"
Vincent Lejeunef43bc572013-04-01 21:47:42 +000021#include "R600RegisterInfo.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000022#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/CodeGen/MachineBasicBlock.h"
25#include "llvm/CodeGen/MachineFunction.h"
Vincent Lejeunef43bc572013-04-01 21:47:42 +000026#include "llvm/CodeGen/MachineFunctionPass.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000027#include "llvm/CodeGen/MachineInstr.h"
Vincent Lejeunef43bc572013-04-01 21:47:42 +000028#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000029#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/Pass.h"
31#include "llvm/Support/ErrorHandling.h"
32#include <cassert>
33#include <cstdint>
34#include <utility>
35#include <vector>
Vincent Lejeunef43bc572013-04-01 21:47:42 +000036
Benjamin Kramerd78bb462013-05-23 17:10:37 +000037using namespace llvm;
38
Tom Stellard1de55822013-12-11 17:51:41 +000039namespace llvm {
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000040
Tom Stellard1de55822013-12-11 17:51:41 +000041 void initializeR600EmitClauseMarkersPass(PassRegistry&);
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000042
43} // end namespace llvm
Tom Stellard1de55822013-12-11 17:51:41 +000044
Benjamin Kramerd78bb462013-05-23 17:10:37 +000045namespace {
Vincent Lejeunef43bc572013-04-01 21:47:42 +000046
Tom Stellard1de55822013-12-11 17:51:41 +000047class R600EmitClauseMarkers : public MachineFunctionPass {
Vincent Lejeunef43bc572013-04-01 21:47:42 +000048private:
Eugene Zelenko734bb7b2017-01-20 17:52:16 +000049 const R600InstrInfo *TII = nullptr;
50 int Address = 0;
Vincent Lejeunef43bc572013-04-01 21:47:42 +000051
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000052 unsigned OccupiedDwords(MachineInstr &MI) const {
53 switch (MI.getOpcode()) {
Vincent Lejeunef43bc572013-04-01 21:47:42 +000054 case AMDGPU::INTERP_PAIR_XY:
55 case AMDGPU::INTERP_PAIR_ZW:
56 case AMDGPU::INTERP_VEC_LOAD:
Vincent Lejeune519f21e2013-05-17 16:50:32 +000057 case AMDGPU::DOT_4:
Vincent Lejeunef43bc572013-04-01 21:47:42 +000058 return 4;
59 case AMDGPU::KILL:
60 return 0;
61 default:
62 break;
63 }
64
Tom Stellard8f9fc202013-11-15 00:12:45 +000065 // These will be expanded to two ALU instructions in the
66 // ExpandSpecialInstructions pass.
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000067 if (TII->isLDSRetInstr(MI.getOpcode()))
Tom Stellard8f9fc202013-11-15 00:12:45 +000068 return 2;
69
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000070 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()) ||
71 TII->isReductionOp(MI.getOpcode()))
Vincent Lejeunef43bc572013-04-01 21:47:42 +000072 return 4;
73
74 unsigned NumLiteral = 0;
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000075 for (MachineInstr::mop_iterator It = MI.operands_begin(),
76 E = MI.operands_end();
77 It != E; ++It) {
Vincent Lejeunef43bc572013-04-01 21:47:42 +000078 MachineOperand &MO = *It;
79 if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
80 ++NumLiteral;
81 }
82 return 1 + NumLiteral;
83 }
84
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000085 bool isALU(const MachineInstr &MI) const {
86 if (TII->isALUInstr(MI.getOpcode()))
Vincent Lejeunef43bc572013-04-01 21:47:42 +000087 return true;
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000088 if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
Vincent Lejeunef43bc572013-04-01 21:47:42 +000089 return true;
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000090 switch (MI.getOpcode()) {
Vincent Lejeunef43bc572013-04-01 21:47:42 +000091 case AMDGPU::PRED_X:
92 case AMDGPU::INTERP_PAIR_XY:
93 case AMDGPU::INTERP_PAIR_ZW:
94 case AMDGPU::INTERP_VEC_LOAD:
95 case AMDGPU::COPY:
Vincent Lejeune519f21e2013-05-17 16:50:32 +000096 case AMDGPU::DOT_4:
Vincent Lejeunef43bc572013-04-01 21:47:42 +000097 return true;
98 default:
99 return false;
100 }
101 }
102
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000103 bool IsTrivialInst(MachineInstr &MI) const {
104 switch (MI.getOpcode()) {
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000105 case AMDGPU::KILL:
106 case AMDGPU::RETURN:
Tom Stellarded0ceec2013-10-10 17:11:12 +0000107 case AMDGPU::IMPLICIT_DEF:
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000108 return true;
109 default:
110 return false;
111 }
112 }
113
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000114 std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const {
115 // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2
116 // (See also R600ISelLowering.cpp)
117 // ConstIndex value is in [0, 4095];
118 return std::pair<unsigned, unsigned>(
119 ((Sel >> 2) - 512) >> 12, // KC_BANK
120 // Line Number of ConstIndex
121 // A line contains 16 constant registers however KCX bank can lock
122 // two line at the same time ; thus we want to get an even line number.
123 // Line number can be retrieved with (>>4), using (>>5) <<1 generates
124 // an even number.
125 ((((Sel >> 2) - 512) & 4095) >> 5) << 1);
126 }
127
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000128 bool
129 SubstituteKCacheBank(MachineInstr &MI,
130 std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
131 bool UpdateInstr = true) const {
Eugene Zelenko734bb7b2017-01-20 17:52:16 +0000132 std::vector<std::pair<unsigned, unsigned>> UsedKCache;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000133
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000134 if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4)
Tom Stellard8f9fc202013-11-15 00:12:45 +0000135 return true;
136
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000137 const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000138 TII->getSrcs(MI);
139 assert(
140 (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == AMDGPU::DOT_4) &&
141 "Can't assign Const");
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000142 for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
Vincent Lejeune0fca91d2013-05-17 16:50:02 +0000143 if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
144 continue;
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000145 unsigned Sel = Consts[i].second;
146 unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
147 unsigned KCacheIndex = Index * 4 + Chan;
148 const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel);
149 if (CachedConsts.empty()) {
150 CachedConsts.push_back(BankLine);
151 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
152 continue;
153 }
154 if (CachedConsts[0] == BankLine) {
155 UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex));
156 continue;
157 }
158 if (CachedConsts.size() == 1) {
159 CachedConsts.push_back(BankLine);
160 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
161 continue;
162 }
163 if (CachedConsts[1] == BankLine) {
164 UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex));
165 continue;
166 }
167 return false;
168 }
169
Tom Stellard8f9fc202013-11-15 00:12:45 +0000170 if (!UpdateInstr)
171 return true;
172
Vincent Lejeune0fca91d2013-05-17 16:50:02 +0000173 for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
174 if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
175 continue;
176 switch(UsedKCache[j].first) {
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000177 case 0:
Vincent Lejeune0fca91d2013-05-17 16:50:02 +0000178 Consts[i].first->setReg(
179 AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second));
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000180 break;
181 case 1:
Vincent Lejeune0fca91d2013-05-17 16:50:02 +0000182 Consts[i].first->setReg(
183 AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second));
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000184 break;
185 default:
186 llvm_unreachable("Wrong Cache Line");
187 }
Vincent Lejeune0fca91d2013-05-17 16:50:02 +0000188 j++;
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000189 }
190 return true;
191 }
192
Tom Stellard8f9fc202013-11-15 00:12:45 +0000193 bool canClauseLocalKillFitInClause(
194 unsigned AluInstCount,
Eugene Zelenko734bb7b2017-01-20 17:52:16 +0000195 std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
Tom Stellard8f9fc202013-11-15 00:12:45 +0000196 MachineBasicBlock::iterator Def,
197 MachineBasicBlock::iterator BBEnd) {
198 const R600RegisterInfo &TRI = TII->getRegisterInfo();
Jan Vesely3ea17042017-03-06 20:10:05 +0000199 //TODO: change this to defs?
Tom Stellard8f9fc202013-11-15 00:12:45 +0000200 for (MachineInstr::const_mop_iterator
201 MOI = Def->operands_begin(),
202 MOE = Def->operands_end(); MOI != MOE; ++MOI) {
203 if (!MOI->isReg() || !MOI->isDef() ||
204 TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
205 continue;
206
207 // Def defines a clause local register, so check that its use will fit
208 // in the clause.
209 unsigned LastUseCount = 0;
210 for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000211 AluInstCount += OccupiedDwords(*UseI);
Tom Stellard8f9fc202013-11-15 00:12:45 +0000212 // Make sure we won't need to end the clause due to KCache limitations.
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000213 if (!SubstituteKCacheBank(*UseI, KCacheBanks, false))
Tom Stellard8f9fc202013-11-15 00:12:45 +0000214 return false;
215
216 // We have reached the maximum instruction limit before finding the
217 // use that kills this register, so we cannot use this def in the
218 // current clause.
219 if (AluInstCount >= TII->getMaxAlusPerClause())
220 return false;
221
Jan Vesely3ea17042017-03-06 20:10:05 +0000222 // TODO: Is this true? kill flag appears to work OK below
Tom Stellard8f9fc202013-11-15 00:12:45 +0000223 // Register kill flags have been cleared by the time we get to this
224 // pass, but it is safe to assume that all uses of this register
225 // occur in the same basic block as its definition, because
226 // it is illegal for the scheduler to schedule them in
227 // different blocks.
Jan Vesely3ea17042017-03-06 20:10:05 +0000228 if (UseI->readsRegister(MOI->getReg()))
Tom Stellard8f9fc202013-11-15 00:12:45 +0000229 LastUseCount = AluInstCount;
230
Jan Vesely3ea17042017-03-06 20:10:05 +0000231 // Exit early if the current use kills the register
232 if (UseI != Def && UseI->killsRegister(MOI->getReg()))
Tom Stellard8f9fc202013-11-15 00:12:45 +0000233 break;
234 }
235 if (LastUseCount)
236 return LastUseCount <= TII->getMaxAlusPerClause();
237 llvm_unreachable("Clause local register live at end of clause.");
238 }
239 return true;
240 }
241
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000242 MachineBasicBlock::iterator
Vincent Lejeunece499742013-07-09 15:03:33 +0000243 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000244 MachineBasicBlock::iterator ClauseHead = I;
Eugene Zelenko734bb7b2017-01-20 17:52:16 +0000245 std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000246 bool PushBeforeModifier = false;
247 unsigned AluInstCount = 0;
248 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000249 if (IsTrivialInst(*I))
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000250 continue;
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000251 if (!isALU(*I))
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000252 break;
Vincent Lejeunec3d3f9b2013-04-03 18:24:47 +0000253 if (AluInstCount > TII->getMaxAlusPerClause())
254 break;
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000255 if (I->getOpcode() == AMDGPU::PRED_X) {
Vincent Lejeune0b342d62013-10-01 19:32:49 +0000256 // We put PRED_X in its own clause to ensure that ifcvt won't create
257 // clauses with more than 128 insts.
258 // IfCvt is indeed checking that "then" and "else" branches of an if
259 // statement have less than ~60 insts thus converted clauses can't be
260 // bigger than ~121 insts (predicate setter needs to be in the same
261 // clause as predicated alus).
262 if (AluInstCount > 0)
263 break;
Duncan P. N. Exon Smith9cfc75c2016-06-30 00:01:54 +0000264 if (TII->getFlagOp(*I).getImm() & MO_FLAG_PUSH)
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000265 PushBeforeModifier = true;
266 AluInstCount ++;
267 continue;
268 }
Tom Stellardce540332013-06-28 15:46:59 +0000269 // XXX: GROUP_BARRIER instructions cannot be in the same ALU clause as:
270 //
271 // * KILL or INTERP instructions
272 // * Any instruction that sets UPDATE_EXEC_MASK or UPDATE_PRED bits
273 // * Uses waterfalling (i.e. INDEX_MODE = AR.X)
274 //
275 // XXX: These checks have not been implemented yet.
276 if (TII->mustBeLastInClause(I->getOpcode())) {
Vincent Lejeune99312982013-04-03 16:24:04 +0000277 I++;
278 break;
279 }
Tom Stellard8f9fc202013-11-15 00:12:45 +0000280
281 // If this instruction defines a clause local register, make sure
282 // its use can fit in this clause.
283 if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000284 break;
Tom Stellard8f9fc202013-11-15 00:12:45 +0000285
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000286 if (!SubstituteKCacheBank(*I, KCacheBanks))
Vincent Lejeunec6896792013-06-04 23:17:15 +0000287 break;
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000288 AluInstCount += OccupiedDwords(*I);
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000289 }
290 unsigned Opcode = PushBeforeModifier ?
291 AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
292 BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
Vincent Lejeunece499742013-07-09 15:03:33 +0000293 // We don't use the ADDR field until R600ControlFlowFinalizer pass, where
294 // it is safe to assume it is 0. However if we always put 0 here, the ifcvt
Matt Arsenault37fefd62016-06-10 02:18:02 +0000295 // pass may assume that identical ALU clause starter at the beginning of a
Vincent Lejeunece499742013-07-09 15:03:33 +0000296 // true and false branch can be factorized which is not the case.
297 .addImm(Address++) // ADDR
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000298 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0
299 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1
300 .addImm(KCacheBanks.empty()?0:2) // KM0
301 .addImm((KCacheBanks.size() < 2)?0:2) // KM1
302 .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0
303 .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1
Vincent Lejeunece499742013-07-09 15:03:33 +0000304 .addImm(AluInstCount) // COUNT
305 .addImm(1); // Enabled
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000306 return I;
307 }
308
309public:
Tom Stellard1de55822013-12-11 17:51:41 +0000310 static char ID;
Tom Stellard1de55822013-12-11 17:51:41 +0000311
Eugene Zelenko734bb7b2017-01-20 17:52:16 +0000312 R600EmitClauseMarkers() : MachineFunctionPass(ID) {
Tom Stellard1de55822013-12-11 17:51:41 +0000313 initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
314 }
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000315
Craig Topper5656db42014-04-29 07:57:24 +0000316 bool runOnMachineFunction(MachineFunction &MF) override {
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000317 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
318 TII = ST.getInstrInfo();
Bill Wendling37e9adb2013-06-07 20:28:55 +0000319
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000320 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
321 BB != BB_E; ++BB) {
322 MachineBasicBlock &MBB = *BB;
323 MachineBasicBlock::iterator I = MBB.begin();
Hans Wennborg0dd9ed12016-08-13 01:12:49 +0000324 if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU)
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000325 continue; // BB was already parsed
326 for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
Jan Vesely4b1243f2017-02-18 04:24:10 +0000327 if (isALU(*I)) {
328 auto next = MakeALUClause(MBB, I);
329 assert(next != I);
330 I = next;
331 } else
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000332 ++I;
333 }
334 }
335 return false;
336 }
337
Mehdi Amini117296c2016-10-01 02:56:57 +0000338 StringRef getPassName() const override {
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000339 return "R600 Emit Clause Markers Pass";
340 }
341};
342
Tom Stellard1de55822013-12-11 17:51:41 +0000343char R600EmitClauseMarkers::ID = 0;
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000344
Benjamin Kramerd78bb462013-05-23 17:10:37 +0000345} // end anonymous namespace
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000346
Tom Stellard1de55822013-12-11 17:51:41 +0000347INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
348 "R600 Emit Clause Markters", false, false)
349INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
350 "R600 Emit Clause Markters", false, false)
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000351
Eugene Zelenko734bb7b2017-01-20 17:52:16 +0000352FunctionPass *llvm::createR600EmitClauseMarkers() {
Tom Stellard1de55822013-12-11 17:51:41 +0000353 return new R600EmitClauseMarkers();
Vincent Lejeunef43bc572013-04-01 21:47:42 +0000354}