blob: 49c4d2223a0dc829c1c95a6619c0a578507e320c [file] [log] [blame]
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +00001//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12/// This pass is merging consecutive CFAlus where applicable.
13/// It needs to be called after IfCvt for best results.
14//===----------------------------------------------------------------------===//
15
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000016#include "AMDGPU.h"
Benjamin Kramer799003b2015-03-23 19:32:43 +000017#include "AMDGPUSubtarget.h"
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000018#include "R600Defines.h"
19#include "R600InstrInfo.h"
20#include "R600MachineFunctionInfo.h"
21#include "R600RegisterInfo.h"
Tom Stellard44b30b42018-05-22 02:03:23 +000022#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000023#include "llvm/CodeGen/MachineFunctionPass.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/raw_ostream.h"
28
29using namespace llvm;
30
Chandler Carruth84e68b22014-04-22 02:41:26 +000031#define DEBUG_TYPE "r600mergeclause"
32
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000033namespace {
34
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000035static bool isCFAlu(const MachineInstr &MI) {
36 switch (MI.getOpcode()) {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000037 case AMDGPU::CF_ALU:
38 case AMDGPU::CF_ALU_PUSH_BEFORE:
39 return true;
40 default:
41 return false;
42 }
43}
44
45class R600ClauseMergePass : public MachineFunctionPass {
46
47private:
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000048 const R600InstrInfo *TII;
49
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000050 unsigned getCFAluSize(const MachineInstr &MI) const;
51 bool isCFAluEnabled(const MachineInstr &MI) const;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000052
53 /// IfCvt pass can generate "disabled" ALU clause marker that need to be
54 /// removed and their content affected to the previous alu clause.
Alp Tokercb402912014-01-24 17:20:08 +000055 /// This function parse instructions after CFAlu until it find a disabled
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000056 /// CFAlu and merge the content, or an enabled CFAlu.
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000057 void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000058
59 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60 /// it is the case.
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000061 bool mergeIfPossible(MachineInstr &RootCFAlu,
62 const MachineInstr &LatrCFAlu) const;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000063
64public:
Tom Stellarda2f57be2017-08-02 22:19:45 +000065 static char ID;
66
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +000067 R600ClauseMergePass() : MachineFunctionPass(ID) { }
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000068
Craig Topper5656db42014-04-29 07:57:24 +000069 bool runOnMachineFunction(MachineFunction &MF) override;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000070
Mehdi Amini117296c2016-10-01 02:56:57 +000071 StringRef getPassName() const override;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000072};
73
Tom Stellarda2f57be2017-08-02 22:19:45 +000074} // end anonymous namespace
75
76INITIALIZE_PASS_BEGIN(R600ClauseMergePass, DEBUG_TYPE,
77 "R600 Clause Merge", false, false)
78INITIALIZE_PASS_END(R600ClauseMergePass, DEBUG_TYPE,
79 "R600 Clause Merge", false, false)
80
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000081char R600ClauseMergePass::ID = 0;
82
Tom Stellarda2f57be2017-08-02 22:19:45 +000083char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;
84
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000085unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000086 assert(isCFAlu(MI));
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000087 return MI
88 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
89 .getImm();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000090}
91
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000092bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000093 assert(isCFAlu(MI));
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000094 return MI
95 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
96 .getImm();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000097}
98
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +000099void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
100 MachineInstr &CFAlu) const {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000101 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000102 MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000103 I++;
104 do {
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000105 while (I != E && !isCFAlu(*I))
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000106 I++;
107 if (I == E)
108 return;
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000109 MachineInstr &MI = *I++;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000110 if (isCFAluEnabled(MI))
111 break;
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000112 CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
113 MI.eraseFromParent();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000114 } while (I != E);
115}
116
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000117bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
118 const MachineInstr &LatrCFAlu) const {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000119 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
120 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
121 unsigned RootInstCount = getCFAluSize(RootCFAlu),
122 LaterInstCount = getCFAluSize(LatrCFAlu);
123 unsigned CumuledInsts = RootInstCount + LaterInstCount;
124 if (CumuledInsts >= TII->getMaxAlusPerClause()) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000125 LLVM_DEBUG(dbgs() << "Excess inst counts\n");
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000126 return false;
127 }
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000128 if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000129 return false;
130 // Is KCache Bank 0 compatible ?
131 int Mode0Idx =
132 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
133 int KBank0Idx =
134 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
135 int KBank0LineIdx =
136 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000137 if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
138 RootCFAlu.getOperand(Mode0Idx).getImm() &&
139 (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
140 RootCFAlu.getOperand(KBank0Idx).getImm() ||
141 LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
142 RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000143 LLVM_DEBUG(dbgs() << "Wrong KC0\n");
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000144 return false;
145 }
146 // Is KCache Bank 1 compatible ?
147 int Mode1Idx =
148 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
149 int KBank1Idx =
150 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
151 int KBank1LineIdx =
152 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000153 if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
154 RootCFAlu.getOperand(Mode1Idx).getImm() &&
155 (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
156 RootCFAlu.getOperand(KBank1Idx).getImm() ||
157 LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
158 RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
Nicola Zaghend34e60c2018-05-14 12:53:11 +0000159 LLVM_DEBUG(dbgs() << "Wrong KC0\n");
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000160 return false;
161 }
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000162 if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
163 RootCFAlu.getOperand(Mode0Idx).setImm(
164 LatrCFAlu.getOperand(Mode0Idx).getImm());
165 RootCFAlu.getOperand(KBank0Idx).setImm(
166 LatrCFAlu.getOperand(KBank0Idx).getImm());
167 RootCFAlu.getOperand(KBank0LineIdx)
168 .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000169 }
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000170 if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
171 RootCFAlu.getOperand(Mode1Idx).setImm(
172 LatrCFAlu.getOperand(Mode1Idx).getImm());
173 RootCFAlu.getOperand(KBank1Idx).setImm(
174 LatrCFAlu.getOperand(KBank1Idx).getImm());
175 RootCFAlu.getOperand(KBank1LineIdx)
176 .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000177 }
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000178 RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
179 RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000180 return true;
181}
182
183bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
Matthias Braunf1caa282017-12-15 22:22:58 +0000184 if (skipFunction(MF.getFunction()))
Andrew Kaylor7de74af2016-04-25 22:23:44 +0000185 return false;
186
Matt Arsenault43e92fe2016-06-24 06:30:11 +0000187 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
188 TII = ST.getInstrInfo();
189
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000190 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
191 BB != BB_E; ++BB) {
192 MachineBasicBlock &MBB = *BB;
193 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
194 MachineBasicBlock::iterator LatestCFAlu = E;
195 while (I != E) {
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000196 MachineInstr &MI = *I++;
197 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
198 TII->mustBeLastInClause(MI.getOpcode()))
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000199 LatestCFAlu = E;
200 if (!isCFAlu(MI))
201 continue;
202 cleanPotentialDisabledCFAlu(MI);
203
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000204 if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
205 MI.eraseFromParent();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000206 } else {
Duncan P. N. Exon Smith4d295112016-07-08 19:16:05 +0000207 assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000208 LatestCFAlu = MI;
209 }
210 }
211 }
212 return false;
213}
214
Mehdi Amini117296c2016-10-01 02:56:57 +0000215StringRef R600ClauseMergePass::getPassName() const {
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000216 return "R600 Merge Clause Markers Pass";
217}
218
Francis Visoiu Mistrih8b617642017-05-18 17:21:13 +0000219llvm::FunctionPass *llvm::createR600ClauseMergePass() {
220 return new R600ClauseMergePass();
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000221}