blob: f07be0001fb830c471d9b00d63d13840381b59ec [file] [log] [blame]
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +00001//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12/// This pass is merging consecutive CFAlus where applicable.
13/// It needs to be called after IfCvt for best results.
14//===----------------------------------------------------------------------===//
15
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000016#include "AMDGPU.h"
17#include "R600Defines.h"
18#include "R600InstrInfo.h"
19#include "R600MachineFunctionInfo.h"
20#include "R600RegisterInfo.h"
Eric Christopherd9134482014-08-04 21:25:23 +000021#include "AMDGPUSubtarget.h"
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000022#include "llvm/CodeGen/MachineFunctionPass.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/Support/Debug.h"
26#include "llvm/Support/raw_ostream.h"
27
28using namespace llvm;
29
Chandler Carruth84e68b22014-04-22 02:41:26 +000030#define DEBUG_TYPE "r600mergeclause"
31
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000032namespace {
33
34static bool isCFAlu(const MachineInstr *MI) {
35 switch (MI->getOpcode()) {
36 case AMDGPU::CF_ALU:
37 case AMDGPU::CF_ALU_PUSH_BEFORE:
38 return true;
39 default:
40 return false;
41 }
42}
43
44class R600ClauseMergePass : public MachineFunctionPass {
45
46private:
47 static char ID;
48 const R600InstrInfo *TII;
49
50 unsigned getCFAluSize(const MachineInstr *MI) const;
51 bool isCFAluEnabled(const MachineInstr *MI) const;
52
53 /// IfCvt pass can generate "disabled" ALU clause marker that need to be
54 /// removed and their content affected to the previous alu clause.
Alp Tokercb402912014-01-24 17:20:08 +000055 /// This function parse instructions after CFAlu until it find a disabled
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000056 /// CFAlu and merge the content, or an enabled CFAlu.
57 void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
58
59 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60 /// it is the case.
61 bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
62 const;
63
64public:
65 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
66
Craig Topper5656db42014-04-29 07:57:24 +000067 bool runOnMachineFunction(MachineFunction &MF) override;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000068
Craig Topper5656db42014-04-29 07:57:24 +000069 const char *getPassName() const override;
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +000070};
71
72char R600ClauseMergePass::ID = 0;
73
74unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
75 assert(isCFAlu(MI));
76 return MI->getOperand(
77 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
78}
79
80bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
81 assert(isCFAlu(MI));
82 return MI->getOperand(
83 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
84}
85
86void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
87 const {
88 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
89 MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
90 I++;
91 do {
92 while (I!= E && !isCFAlu(I))
93 I++;
94 if (I == E)
95 return;
96 MachineInstr *MI = I++;
97 if (isCFAluEnabled(MI))
98 break;
99 CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
100 MI->eraseFromParent();
101 } while (I != E);
102}
103
104bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
105 const MachineInstr *LatrCFAlu) const {
106 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
107 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
108 unsigned RootInstCount = getCFAluSize(RootCFAlu),
109 LaterInstCount = getCFAluSize(LatrCFAlu);
110 unsigned CumuledInsts = RootInstCount + LaterInstCount;
111 if (CumuledInsts >= TII->getMaxAlusPerClause()) {
112 DEBUG(dbgs() << "Excess inst counts\n");
113 return false;
114 }
115 if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
116 return false;
117 // Is KCache Bank 0 compatible ?
118 int Mode0Idx =
119 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
120 int KBank0Idx =
121 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
122 int KBank0LineIdx =
123 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
124 if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
125 RootCFAlu->getOperand(Mode0Idx).getImm() &&
126 (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
127 RootCFAlu->getOperand(KBank0Idx).getImm() ||
128 LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
129 RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
130 DEBUG(dbgs() << "Wrong KC0\n");
131 return false;
132 }
133 // Is KCache Bank 1 compatible ?
134 int Mode1Idx =
135 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
136 int KBank1Idx =
137 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
138 int KBank1LineIdx =
139 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
140 if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
141 RootCFAlu->getOperand(Mode1Idx).getImm() &&
142 (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
143 RootCFAlu->getOperand(KBank1Idx).getImm() ||
144 LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
145 RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
146 DEBUG(dbgs() << "Wrong KC0\n");
147 return false;
148 }
149 if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
150 RootCFAlu->getOperand(Mode0Idx).setImm(
151 LatrCFAlu->getOperand(Mode0Idx).getImm());
152 RootCFAlu->getOperand(KBank0Idx).setImm(
153 LatrCFAlu->getOperand(KBank0Idx).getImm());
154 RootCFAlu->getOperand(KBank0LineIdx).setImm(
155 LatrCFAlu->getOperand(KBank0LineIdx).getImm());
156 }
157 if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
158 RootCFAlu->getOperand(Mode1Idx).setImm(
159 LatrCFAlu->getOperand(Mode1Idx).getImm());
160 RootCFAlu->getOperand(KBank1Idx).setImm(
161 LatrCFAlu->getOperand(KBank1Idx).getImm());
162 RootCFAlu->getOperand(KBank1LineIdx).setImm(
163 LatrCFAlu->getOperand(KBank1LineIdx).getImm());
164 }
165 RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
166 RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
167 return true;
168}
169
170bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
Eric Christopherfc6de422014-08-05 02:39:49 +0000171 TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
Vincent Lejeunea4da6fb2013-10-01 19:32:58 +0000172 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
173 BB != BB_E; ++BB) {
174 MachineBasicBlock &MBB = *BB;
175 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
176 MachineBasicBlock::iterator LatestCFAlu = E;
177 while (I != E) {
178 MachineInstr *MI = I++;
179 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
180 TII->mustBeLastInClause(MI->getOpcode()))
181 LatestCFAlu = E;
182 if (!isCFAlu(MI))
183 continue;
184 cleanPotentialDisabledCFAlu(MI);
185
186 if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
187 MI->eraseFromParent();
188 } else {
189 assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
190 LatestCFAlu = MI;
191 }
192 }
193 }
194 return false;
195}
196
197const char *R600ClauseMergePass::getPassName() const {
198 return "R600 Merge Clause Markers Pass";
199}
200
201} // end anonymous namespace
202
203
204llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
205 return new R600ClauseMergePass(TM);
206}