blob: 6b42a7a9faf240f918d709d479f3fbbdfe47269d [file] [log] [blame]
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +00001//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// This pass compute turns all control flow pseudo instructions into native one
12/// computing their address on the fly ; it also sets STACK_SIZE info.
13//===----------------------------------------------------------------------===//
14
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +000015#define DEBUG_TYPE "r600cf"
16#include "llvm/Support/Debug.h"
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +000017#include "AMDGPU.h"
18#include "R600Defines.h"
19#include "R600InstrInfo.h"
20#include "R600MachineFunctionInfo.h"
21#include "R600RegisterInfo.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
Benjamin Kramerd78bb462013-05-23 17:10:37 +000025#include "llvm/Support/raw_ostream.h"
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +000026
Benjamin Kramerd78bb462013-05-23 17:10:37 +000027using namespace llvm;
28
29namespace {
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +000030
Tom Stellarda40f9712014-01-22 21:55:43 +000031struct CFStack {
32
33 enum StackItem {
34 ENTRY = 0,
35 SUB_ENTRY = 1,
36 FIRST_NON_WQM_PUSH = 2,
37 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
38 };
39
40 const AMDGPUSubtarget &ST;
41 std::vector<StackItem> BranchStack;
42 std::vector<StackItem> LoopStack;
43 unsigned MaxStackSize;
44 unsigned CurrentEntries;
45 unsigned CurrentSubEntries;
46
47 CFStack(const AMDGPUSubtarget &st, unsigned ShaderType) : ST(st),
48 // We need to reserve a stack entry for CALL_FS in vertex shaders.
49 MaxStackSize(ShaderType == ShaderType::VERTEX ? 1 : 0),
50 CurrentEntries(0), CurrentSubEntries(0) { }
51
52 unsigned getLoopDepth();
53 bool branchStackContains(CFStack::StackItem);
54 bool requiresWorkAroundForInst(unsigned Opcode);
55 unsigned getSubEntrySize(CFStack::StackItem Item);
56 void updateMaxStackSize();
57 void pushBranch(unsigned Opcode, bool isWQM = false);
58 void pushLoop();
59 void popBranch();
60 void popLoop();
61};
62
63unsigned CFStack::getLoopDepth() {
64 return LoopStack.size();
65}
66
67bool CFStack::branchStackContains(CFStack::StackItem Item) {
68 for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
69 E = BranchStack.end(); I != E; ++I) {
70 if (*I == Item)
71 return true;
72 }
73 return false;
74}
75
76unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
77 switch(Item) {
78 default:
79 return 0;
80 case CFStack::FIRST_NON_WQM_PUSH:
81 assert(!ST.hasCaymanISA());
82 if (ST.getGeneration() <= AMDGPUSubtarget::R700) {
83 // +1 For the push operation.
84 // +2 Extra space required.
85 return 3;
86 } else {
87 // Some documentation says that this is not necessary on Evergreen,
88 // but experimentation has show that we need to allocate 1 extra
89 // sub-entry for the first non-WQM push.
90 // +1 For the push operation.
91 // +1 Extra space required.
92 return 2;
93 }
94 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
95 assert(ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
96 // +1 For the push operation.
97 // +1 Extra space required.
98 return 2;
99 case CFStack::SUB_ENTRY:
100 return 1;
101 }
102}
103
104void CFStack::updateMaxStackSize() {
105 unsigned CurrentStackSize = CurrentEntries +
106 (RoundUpToAlignment(CurrentSubEntries, 4) / 4);
107 MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
108}
109
110void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
111 CFStack::StackItem Item = CFStack::ENTRY;
112 switch(Opcode) {
113 case AMDGPU::CF_PUSH_EG:
114 case AMDGPU::CF_ALU_PUSH_BEFORE:
115 if (!isWQM) {
116 if (!ST.hasCaymanISA() && !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
117 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
118 // See comment in
119 // CFStack::getSubEntrySize()
120 else if (CurrentEntries > 0 &&
121 ST.getGeneration() > AMDGPUSubtarget::EVERGREEN &&
122 !ST.hasCaymanISA() &&
123 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
124 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
125 else
126 Item = CFStack::SUB_ENTRY;
127 } else
128 Item = CFStack::ENTRY;
129 break;
130 }
131 BranchStack.push_back(Item);
132 if (Item == CFStack::ENTRY)
133 CurrentEntries++;
134 else
135 CurrentSubEntries += getSubEntrySize(Item);
136 updateMaxStackSize();
137}
138
139void CFStack::pushLoop() {
140 LoopStack.push_back(CFStack::ENTRY);
141 CurrentEntries++;
142 updateMaxStackSize();
143}
144
145void CFStack::popBranch() {
146 CFStack::StackItem Top = BranchStack.back();
147 if (Top == CFStack::ENTRY)
148 CurrentEntries--;
149 else
150 CurrentSubEntries-= getSubEntrySize(Top);
151 BranchStack.pop_back();
152}
153
154void CFStack::popLoop() {
155 CurrentEntries--;
156 LoopStack.pop_back();
157}
158
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000159class R600ControlFlowFinalizer : public MachineFunctionPass {
160
161private:
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000162 typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
163
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000164 enum ControlFlowInstruction {
165 CF_TC,
Vincent Lejeunec2991642013-04-30 00:13:39 +0000166 CF_VC,
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000167 CF_CALL_FS,
168 CF_WHILE_LOOP,
169 CF_END_LOOP,
170 CF_LOOP_BREAK,
171 CF_LOOP_CONTINUE,
172 CF_JUMP,
173 CF_ELSE,
Vincent Lejeuneb6bfe852013-04-23 17:34:00 +0000174 CF_POP,
175 CF_END
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000176 };
NAKAMURA Takumi3b0853b2013-04-11 04:16:22 +0000177
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000178 static char ID;
179 const R600InstrInfo *TII;
Bill Wendling37e9adb2013-06-07 20:28:55 +0000180 const R600RegisterInfo *TRI;
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000181 unsigned MaxFetchInst;
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000182 const AMDGPUSubtarget &ST;
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000183
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000184 bool IsTrivialInst(MachineInstr *MI) const {
185 switch (MI->getOpcode()) {
186 case AMDGPU::KILL:
187 case AMDGPU::RETURN:
188 return true;
189 default:
190 return false;
191 }
192 }
193
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000194 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
Vincent Lejeuneb6bfe852013-04-23 17:34:00 +0000195 unsigned Opcode = 0;
Tom Stellarda6c6e1b2013-06-07 20:37:48 +0000196 bool isEg = (ST.getGeneration() >= AMDGPUSubtarget::EVERGREEN);
Vincent Lejeuneb6bfe852013-04-23 17:34:00 +0000197 switch (CFI) {
198 case CF_TC:
199 Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
200 break;
Vincent Lejeunec2991642013-04-30 00:13:39 +0000201 case CF_VC:
202 Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
203 break;
Vincent Lejeuneb6bfe852013-04-23 17:34:00 +0000204 case CF_CALL_FS:
205 Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
206 break;
207 case CF_WHILE_LOOP:
208 Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
209 break;
210 case CF_END_LOOP:
211 Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
212 break;
213 case CF_LOOP_BREAK:
214 Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
215 break;
216 case CF_LOOP_CONTINUE:
217 Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
218 break;
219 case CF_JUMP:
220 Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
221 break;
222 case CF_ELSE:
223 Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
224 break;
225 case CF_POP:
226 Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
227 break;
228 case CF_END:
Tom Stellarda6c6e1b2013-06-07 20:37:48 +0000229 if (ST.hasCaymanISA()) {
Vincent Lejeuneb6bfe852013-04-23 17:34:00 +0000230 Opcode = AMDGPU::CF_END_CM;
231 break;
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000232 }
Vincent Lejeuneb6bfe852013-04-23 17:34:00 +0000233 Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
234 break;
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000235 }
Vincent Lejeuneb6bfe852013-04-23 17:34:00 +0000236 assert (Opcode && "No opcode selected");
237 return TII->get(Opcode);
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000238 }
239
Vincent Lejeune7c395f72013-04-30 00:14:00 +0000240 bool isCompatibleWithClause(const MachineInstr *MI,
Vincent Lejeune4d143322013-06-07 23:30:26 +0000241 std::set<unsigned> &DstRegs) const {
Vincent Lejeune7c395f72013-04-30 00:14:00 +0000242 unsigned DstMI, SrcMI;
243 for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
244 E = MI->operands_end(); I != E; ++I) {
245 const MachineOperand &MO = *I;
246 if (!MO.isReg())
247 continue;
Tom Stellard1b086cb2013-05-23 18:26:42 +0000248 if (MO.isDef()) {
249 unsigned Reg = MO.getReg();
250 if (AMDGPU::R600_Reg128RegClass.contains(Reg))
251 DstMI = Reg;
252 else
Bill Wendling37e9adb2013-06-07 20:28:55 +0000253 DstMI = TRI->getMatchingSuperReg(Reg,
254 TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
Tom Stellard1b086cb2013-05-23 18:26:42 +0000255 &AMDGPU::R600_Reg128RegClass);
256 }
Vincent Lejeune7c395f72013-04-30 00:14:00 +0000257 if (MO.isUse()) {
258 unsigned Reg = MO.getReg();
259 if (AMDGPU::R600_Reg128RegClass.contains(Reg))
260 SrcMI = Reg;
261 else
Bill Wendling37e9adb2013-06-07 20:28:55 +0000262 SrcMI = TRI->getMatchingSuperReg(Reg,
263 TRI->getSubRegFromChannel(TRI->getHWRegChan(Reg)),
Vincent Lejeune7c395f72013-04-30 00:14:00 +0000264 &AMDGPU::R600_Reg128RegClass);
265 }
266 }
Vincent Lejeune4d143322013-06-07 23:30:26 +0000267 if ((DstRegs.find(SrcMI) == DstRegs.end())) {
Vincent Lejeune7c395f72013-04-30 00:14:00 +0000268 DstRegs.insert(DstMI);
269 return true;
270 } else
271 return false;
272 }
273
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000274 ClauseFile
275 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
276 const {
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000277 MachineBasicBlock::iterator ClauseHead = I;
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000278 std::vector<MachineInstr *> ClauseContent;
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000279 unsigned AluInstCount = 0;
Vincent Lejeunec2991642013-04-30 00:13:39 +0000280 bool IsTex = TII->usesTextureCache(ClauseHead);
Vincent Lejeune4d143322013-06-07 23:30:26 +0000281 std::set<unsigned> DstRegs;
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000282 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
283 if (IsTrivialInst(I))
284 continue;
Vincent Lejeunef9f4e1e2013-05-17 16:49:55 +0000285 if (AluInstCount >= MaxFetchInst)
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000286 break;
Vincent Lejeunec2991642013-04-30 00:13:39 +0000287 if ((IsTex && !TII->usesTextureCache(I)) ||
288 (!IsTex && !TII->usesVertexCache(I)))
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000289 break;
Vincent Lejeune4d143322013-06-07 23:30:26 +0000290 if (!isCompatibleWithClause(I, DstRegs))
Vincent Lejeune7c395f72013-04-30 00:14:00 +0000291 break;
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000292 AluInstCount ++;
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000293 ClauseContent.push_back(I);
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000294 }
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000295 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
Vincent Lejeunec2991642013-04-30 00:13:39 +0000296 getHWInstrDesc(IsTex?CF_TC:CF_VC))
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000297 .addImm(0) // ADDR
298 .addImm(AluInstCount - 1); // COUNT
299 return ClauseFile(MIb, ClauseContent);
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000300 }
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000301
Vincent Lejeuneddd43382013-05-02 21:53:03 +0000302 void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
Craig Topper0afd0ab2013-07-15 06:39:13 +0000303 static const unsigned LiteralRegs[] = {
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000304 AMDGPU::ALU_LITERAL_X,
305 AMDGPU::ALU_LITERAL_Y,
306 AMDGPU::ALU_LITERAL_Z,
307 AMDGPU::ALU_LITERAL_W
308 };
Vincent Lejeune0fca91d2013-05-17 16:50:02 +0000309 const SmallVector<std::pair<MachineOperand *, int64_t>, 3 > Srcs =
310 TII->getSrcs(MI);
311 for (unsigned i = 0, e = Srcs.size(); i < e; ++i) {
312 if (Srcs[i].first->getReg() != AMDGPU::ALU_LITERAL_X)
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000313 continue;
Vincent Lejeune0fca91d2013-05-17 16:50:02 +0000314 int64_t Imm = Srcs[i].second;
Vincent Lejeuneddd43382013-05-02 21:53:03 +0000315 std::vector<int64_t>::iterator It =
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000316 std::find(Lits.begin(), Lits.end(), Imm);
317 if (It != Lits.end()) {
318 unsigned Index = It - Lits.begin();
Vincent Lejeune0fca91d2013-05-17 16:50:02 +0000319 Srcs[i].first->setReg(LiteralRegs[Index]);
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000320 } else {
321 assert(Lits.size() < 4 && "Too many literals in Instruction Group");
Vincent Lejeune0fca91d2013-05-17 16:50:02 +0000322 Srcs[i].first->setReg(LiteralRegs[Lits.size()]);
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000323 Lits.push_back(Imm);
324 }
325 }
326 }
327
328 MachineBasicBlock::iterator insertLiterals(
329 MachineBasicBlock::iterator InsertPos,
330 const std::vector<unsigned> &Literals) const {
331 MachineBasicBlock *MBB = InsertPos->getParent();
332 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
333 unsigned LiteralPair0 = Literals[i];
334 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
335 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
336 TII->get(AMDGPU::LITERALS))
337 .addImm(LiteralPair0)
338 .addImm(LiteralPair1);
339 }
340 return InsertPos;
341 }
342
343 ClauseFile
344 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
345 const {
346 MachineBasicBlock::iterator ClauseHead = I;
347 std::vector<MachineInstr *> ClauseContent;
348 I++;
349 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
350 if (IsTrivialInst(I)) {
351 ++I;
352 continue;
353 }
354 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
355 break;
Vincent Lejeuneddd43382013-05-02 21:53:03 +0000356 std::vector<int64_t> Literals;
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000357 if (I->isBundle()) {
358 MachineInstr *DeleteMI = I;
359 MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
360 while (++BI != E && BI->isBundledWithPred()) {
361 BI->unbundleFromPred();
362 for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
363 MachineOperand &MO = BI->getOperand(i);
364 if (MO.isReg() && MO.isInternalRead())
365 MO.setIsInternalRead(false);
366 }
367 getLiteral(BI, Literals);
368 ClauseContent.push_back(BI);
369 }
370 I = BI;
371 DeleteMI->eraseFromParent();
372 } else {
373 getLiteral(I, Literals);
374 ClauseContent.push_back(I);
375 I++;
376 }
377 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
378 unsigned literal0 = Literals[i];
379 unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
380 MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
381 TII->get(AMDGPU::LITERALS))
382 .addImm(literal0)
383 .addImm(literal2);
384 ClauseContent.push_back(MILit);
385 }
386 }
Vincent Lejeunece499742013-07-09 15:03:33 +0000387 assert(ClauseContent.size() < 128 && "ALU clause is too big");
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000388 ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
389 return ClauseFile(ClauseHead, ClauseContent);
390 }
391
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000392 void
393 EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
394 unsigned &CfCount) {
395 CounterPropagateAddr(Clause.first, CfCount);
396 MachineBasicBlock *BB = Clause.first->getParent();
397 BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
398 .addImm(CfCount);
399 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
400 BB->splice(InsertPos, BB, Clause.second[i]);
401 }
402 CfCount += 2 * Clause.second.size();
403 }
404
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000405 void
406 EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
407 unsigned &CfCount) {
Vincent Lejeunece499742013-07-09 15:03:33 +0000408 Clause.first->getOperand(0).setImm(0);
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000409 CounterPropagateAddr(Clause.first, CfCount);
410 MachineBasicBlock *BB = Clause.first->getParent();
411 BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
412 .addImm(CfCount);
413 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
414 BB->splice(InsertPos, BB, Clause.second[i]);
415 }
416 CfCount += Clause.second.size();
417 }
418
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000419 void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +0000420 MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000421 }
422 void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
423 const {
424 for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
425 It != E; ++It) {
426 MachineInstr *MI = *It;
427 CounterPropagateAddr(MI, Addr);
428 }
429 }
430
431public:
432 R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
Bill Wendling37e9adb2013-06-07 20:28:55 +0000433 TII (0), TRI(0),
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000434 ST(tm.getSubtarget<AMDGPUSubtarget>()) {
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000435 const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
Vincent Lejeunef9f4e1e2013-05-17 16:49:55 +0000436 MaxFetchInst = ST.getTexVTXClauseSize();
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000437 }
438
439 virtual bool runOnMachineFunction(MachineFunction &MF) {
Bill Wendling37e9adb2013-06-07 20:28:55 +0000440 TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
441 TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo());
Tom Stellarda40f9712014-01-22 21:55:43 +0000442 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
Bill Wendling37e9adb2013-06-07 20:28:55 +0000443
Tom Stellarda40f9712014-01-22 21:55:43 +0000444 CFStack CFStack(ST, MFI->ShaderType);
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000445 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
446 ++MB) {
447 MachineBasicBlock &MBB = *MB;
448 unsigned CfCount = 0;
449 std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +0000450 std::vector<MachineInstr * > IfThenElseStack;
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000451 if (MFI->ShaderType == 1) {
452 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000453 getHWInstrDesc(CF_CALL_FS));
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000454 CfCount++;
455 }
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000456 std::vector<ClauseFile> FetchClauses, AluClauses;
Vincent Lejeune8b8a7b52013-07-19 21:45:15 +0000457 std::vector<MachineInstr *> LastAlu(1);
458 std::vector<MachineInstr *> ToPopAfter;
459
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000460 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
461 I != E;) {
Vincent Lejeunec2991642013-04-30 00:13:39 +0000462 if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +0000463 DEBUG(dbgs() << CfCount << ":"; I->dump(););
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000464 FetchClauses.push_back(MakeFetchClause(MBB, I));
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000465 CfCount++;
466 continue;
467 }
468
469 MachineBasicBlock::iterator MI = I;
Vincent Lejeune8b8a7b52013-07-19 21:45:15 +0000470 if (MI->getOpcode() != AMDGPU::ENDIF)
471 LastAlu.back() = 0;
472 if (MI->getOpcode() == AMDGPU::CF_ALU)
473 LastAlu.back() = MI;
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000474 I++;
475 switch (MI->getOpcode()) {
476 case AMDGPU::CF_ALU_PUSH_BEFORE:
Tom Stellarda40f9712014-01-22 21:55:43 +0000477 if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) {
Tom Stellardafbb6972014-01-22 21:55:41 +0000478 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
Vincent Lejeune4b8d9e32013-12-02 17:29:37 +0000479 .addImm(CfCount + 1)
480 .addImm(1);
481 MI->setDesc(TII->get(AMDGPU::CF_ALU));
482 CfCount++;
Tom Stellarda40f9712014-01-22 21:55:43 +0000483 CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
484 } else
485 CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
486
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000487 case AMDGPU::CF_ALU:
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000488 I = MI;
489 AluClauses.push_back(MakeALUClause(MBB, I));
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +0000490 DEBUG(dbgs() << CfCount << ":"; MI->dump(););
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000491 CfCount++;
492 break;
493 case AMDGPU::WHILELOOP: {
Tom Stellarda40f9712014-01-22 21:55:43 +0000494 CFStack.pushLoop();
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000495 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000496 getHWInstrDesc(CF_WHILE_LOOP))
Vincent Lejeune04d9aa42013-04-10 13:29:20 +0000497 .addImm(1);
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000498 std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
499 std::set<MachineInstr *>());
500 Pair.second.insert(MIb);
501 LoopStack.push_back(Pair);
502 MI->eraseFromParent();
503 CfCount++;
504 break;
505 }
506 case AMDGPU::ENDLOOP: {
Tom Stellarda40f9712014-01-22 21:55:43 +0000507 CFStack.popLoop();
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000508 std::pair<unsigned, std::set<MachineInstr *> > Pair =
509 LoopStack.back();
510 LoopStack.pop_back();
511 CounterPropagateAddr(Pair.second, CfCount);
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000512 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000513 .addImm(Pair.first + 1);
514 MI->eraseFromParent();
515 CfCount++;
516 break;
517 }
518 case AMDGPU::IF_PREDICATE_SET: {
Vincent Lejeune8b8a7b52013-07-19 21:45:15 +0000519 LastAlu.push_back(0);
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000520 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000521 getHWInstrDesc(CF_JUMP))
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000522 .addImm(0)
523 .addImm(0);
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +0000524 IfThenElseStack.push_back(MIb);
525 DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000526 MI->eraseFromParent();
527 CfCount++;
528 break;
529 }
530 case AMDGPU::ELSE: {
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +0000531 MachineInstr * JumpInst = IfThenElseStack.back();
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000532 IfThenElseStack.pop_back();
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +0000533 CounterPropagateAddr(JumpInst, CfCount);
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000534 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000535 getHWInstrDesc(CF_ELSE))
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000536 .addImm(0)
Vincent Lejeune8b8a7b52013-07-19 21:45:15 +0000537 .addImm(0);
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +0000538 DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
539 IfThenElseStack.push_back(MIb);
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000540 MI->eraseFromParent();
541 CfCount++;
542 break;
543 }
544 case AMDGPU::ENDIF: {
Tom Stellarda40f9712014-01-22 21:55:43 +0000545 CFStack.popBranch();
Vincent Lejeune8b8a7b52013-07-19 21:45:15 +0000546 if (LastAlu.back()) {
547 ToPopAfter.push_back(LastAlu.back());
548 } else {
549 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
550 getHWInstrDesc(CF_POP))
551 .addImm(CfCount + 1)
552 .addImm(1);
553 (void)MIb;
554 DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
555 CfCount++;
556 }
557
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +0000558 MachineInstr *IfOrElseInst = IfThenElseStack.back();
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000559 IfThenElseStack.pop_back();
Vincent Lejeune8b8a7b52013-07-19 21:45:15 +0000560 CounterPropagateAddr(IfOrElseInst, CfCount);
561 IfOrElseInst->getOperand(1).setImm(1);
562 LastAlu.pop_back();
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000563 MI->eraseFromParent();
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000564 break;
565 }
Vincent Lejeune0c5ed2b2013-07-31 19:31:14 +0000566 case AMDGPU::BREAK: {
567 CfCount ++;
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000568 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000569 getHWInstrDesc(CF_LOOP_BREAK))
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000570 .addImm(0);
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000571 LoopStack.back().second.insert(MIb);
572 MI->eraseFromParent();
573 break;
574 }
575 case AMDGPU::CONTINUE: {
576 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
Vincent Lejeune5f11dd32013-04-08 13:05:49 +0000577 getHWInstrDesc(CF_LOOP_CONTINUE))
Vincent Lejeuneb6d6c0d2013-04-03 16:24:09 +0000578 .addImm(0);
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000579 LoopStack.back().second.insert(MIb);
580 MI->eraseFromParent();
581 CfCount++;
582 break;
583 }
Vincent Lejeuneb6bfe852013-04-23 17:34:00 +0000584 case AMDGPU::RETURN: {
585 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
586 CfCount++;
587 MI->eraseFromParent();
588 if (CfCount % 2) {
589 BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
590 CfCount++;
591 }
Vincent Lejeune3f1d1362013-04-30 00:13:53 +0000592 for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
593 EmitFetchClause(I, FetchClauses[i], CfCount);
Vincent Lejeune3abdbf12013-04-30 00:14:38 +0000594 for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
595 EmitALUClause(I, AluClauses[i], CfCount);
Vincent Lejeuneb6bfe852013-04-23 17:34:00 +0000596 }
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000597 default:
Tom Stellard676c16d2013-08-16 01:11:51 +0000598 if (TII->isExport(MI->getOpcode())) {
599 DEBUG(dbgs() << CfCount << ":"; MI->dump(););
600 CfCount++;
601 }
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000602 break;
603 }
604 }
Vincent Lejeune8b8a7b52013-07-19 21:45:15 +0000605 for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
606 MachineInstr *Alu = ToPopAfter[i];
607 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
608 TII->get(AMDGPU::CF_ALU_POP_AFTER))
609 .addImm(Alu->getOperand(0).getImm())
610 .addImm(Alu->getOperand(1).getImm())
611 .addImm(Alu->getOperand(2).getImm())
612 .addImm(Alu->getOperand(3).getImm())
613 .addImm(Alu->getOperand(4).getImm())
614 .addImm(Alu->getOperand(5).getImm())
615 .addImm(Alu->getOperand(6).getImm())
616 .addImm(Alu->getOperand(7).getImm())
617 .addImm(Alu->getOperand(8).getImm());
618 Alu->eraseFromParent();
619 }
Tom Stellarda40f9712014-01-22 21:55:43 +0000620 MFI->StackSize = CFStack.MaxStackSize;
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000621 }
622
623 return false;
624 }
625
626 const char *getPassName() const {
627 return "R600 Control Flow Finalizer Pass";
628 }
629};
630
631char R600ControlFlowFinalizer::ID = 0;
632
Benjamin Kramerd78bb462013-05-23 17:10:37 +0000633} // end anonymous namespace
Vincent Lejeunebfaa63a62013-04-01 21:48:05 +0000634
635
636llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
637 return new R600ControlFlowFinalizer(TM);
638}