blob: 3ccde79e2df4db3b4bfcc5a426c0176acc184cce [file] [log] [blame]
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +00001//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12/// This pass is merging consecutive CFAlus where applicable.
13/// It needs to be called after IfCvt for best results.
14//===----------------------------------------------------------------------===//
15
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000016#include "AMDGPU.h"
Benjamin Kramer799003b2015-03-23 19:32:43 +000017#include "AMDGPUSubtarget.h"
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000018#include "R600Defines.h"
19#include "R600InstrInfo.h"
20#include "R600MachineFunctionInfo.h"
21#include "R600RegisterInfo.h"
22#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace llvm;
29
Chandler Carruth84e68b22014-04-22 02:41:26 +000030#define DEBUG_TYPE "r600mergeclause"
31
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000032namespace {
33
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000034static bool isCFAlu(const MachineInstr &MI) {
35 switch (MI.getOpcode()) {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000036 case AMDGPU::CF_ALU:
37 case AMDGPU::CF_ALU_PUSH_BEFORE:
38 return true;
39 default:
40 return false;
41 }
42}
43
44class R600ClauseMergePass : public MachineFunctionPass {
45
46private:
47 static char ID;
48 const R600InstrInfo *TII;
49
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000050 unsigned getCFAluSize(const MachineInstr &MI) const;
51 bool isCFAluEnabled(const MachineInstr &MI) const;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000052
53 /// IfCvt pass can generate "disabled" ALU clause marker that need to be
54 /// removed and their content affected to the previous alu clause.
Alp Tokercb402912014-01-24 17:20:08 +000055 /// This function parse instructions after CFAlu until it find a disabled
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000056 /// CFAlu and merge the content, or an enabled CFAlu.
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000057 void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000058
59 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60 /// it is the case.
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000061 bool mergeIfPossible(MachineInstr &RootCFAlu,
62 const MachineInstr &LatrCFAlu) const;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000063
64public:
65 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
66
Craig Topper5656db42014-04-29 07:57:24 +000067 bool runOnMachineFunction(MachineFunction &MF) override;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000068
Craig Topper5656db42014-04-29 07:57:24 +000069 const char *getPassName() const override;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000070};
71
72char R600ClauseMergePass::ID = 0;
73
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000074unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000075 assert(isCFAlu(MI));
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000076 return MI
77 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
78 .getImm();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000079}
80
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000081bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000082 assert(isCFAlu(MI));
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000083 return MI
84 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
85 .getImm();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000086}
87
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000088void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
89 MachineInstr &CFAlu) const {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000090 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000091 MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000092 I++;
93 do {
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000094 while (I != E && !isCFAlu(*I))
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000095 I++;
96 if (I == E)
97 return;
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000098 MachineInstr &MI = *I++;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000099 if (isCFAluEnabled(MI))
100 break;
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000101 CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
102 MI.eraseFromParent();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000103 } while (I != E);
104}
105
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000106bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
107 const MachineInstr &LatrCFAlu) const {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000108 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
109 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
110 unsigned RootInstCount = getCFAluSize(RootCFAlu),
111 LaterInstCount = getCFAluSize(LatrCFAlu);
112 unsigned CumuledInsts = RootInstCount + LaterInstCount;
113 if (CumuledInsts >= TII->getMaxAlusPerClause()) {
114 DEBUG(dbgs() << "Excess inst counts\n");
115 return false;
116 }
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000117 if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000118 return false;
119 // Is KCache Bank 0 compatible ?
120 int Mode0Idx =
121 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
122 int KBank0Idx =
123 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
124 int KBank0LineIdx =
125 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000126 if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
127 RootCFAlu.getOperand(Mode0Idx).getImm() &&
128 (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
129 RootCFAlu.getOperand(KBank0Idx).getImm() ||
130 LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
131 RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000132 DEBUG(dbgs() << "Wrong KC0\n");
133 return false;
134 }
135 // Is KCache Bank 1 compatible ?
136 int Mode1Idx =
137 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
138 int KBank1Idx =
139 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
140 int KBank1LineIdx =
141 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000142 if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
143 RootCFAlu.getOperand(Mode1Idx).getImm() &&
144 (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
145 RootCFAlu.getOperand(KBank1Idx).getImm() ||
146 LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
147 RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000148 DEBUG(dbgs() << "Wrong KC0\n");
149 return false;
150 }
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000151 if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
152 RootCFAlu.getOperand(Mode0Idx).setImm(
153 LatrCFAlu.getOperand(Mode0Idx).getImm());
154 RootCFAlu.getOperand(KBank0Idx).setImm(
155 LatrCFAlu.getOperand(KBank0Idx).getImm());
156 RootCFAlu.getOperand(KBank0LineIdx)
157 .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000158 }
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000159 if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
160 RootCFAlu.getOperand(Mode1Idx).setImm(
161 LatrCFAlu.getOperand(Mode1Idx).getImm());
162 RootCFAlu.getOperand(KBank1Idx).setImm(
163 LatrCFAlu.getOperand(KBank1Idx).getImm());
164 RootCFAlu.getOperand(KBank1LineIdx)
165 .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000166 }
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000167 RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
168 RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000169 return true;
170}
171
172bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
Andrew Kaylor7de74af2016-04-25 22:23:44 +0000173 if (skipFunction(*MF.getFunction()))
174 return false;
175
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000176 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
177 TII = ST.getInstrInfo();
178
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000179 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
180 BB != BB_E; ++BB) {
181 MachineBasicBlock &MBB = *BB;
182 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
183 MachineBasicBlock::iterator LatestCFAlu = E;
184 while (I != E) {
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000185 MachineInstr &MI = *I++;
186 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
187 TII->mustBeLastInClause(MI.getOpcode()))
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000188 LatestCFAlu = E;
189 if (!isCFAlu(MI))
190 continue;
191 cleanPotentialDisabledCFAlu(MI);
192
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000193 if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
194 MI.eraseFromParent();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000195 } else {
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000196 assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000197 LatestCFAlu = MI;
198 }
199 }
200 }
201 return false;
202}
203
204const char *R600ClauseMergePass::getPassName() const {
205 return "R600 Merge Clause Markers Pass";
206}
207
208} // end anonymous namespace
209
210
211llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
212 return new R600ClauseMergePass(TM);
213}