blob: 430cf480eb190973b3572ebf335ab5e1fe8311d0 [file] [log] [blame]
Valery Pykhtin3d9afa22018-11-30 14:21:56 +00001//=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Valery Pykhtin3d9afa22018-11-30 14:21:56 +00006//
7//===----------------------------------------------------------------------===//
8// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
Valery Pykhtin7fe97f82019-02-08 11:59:48 +00009// operand. If any of the use instruction cannot be combined with the mov the
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000010// whole sequence is reverted.
11//
12// $old = ...
13// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000014// dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15// $res = VALU $dpp_value [, src1]
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000016//
17// to
18//
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000019// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20// dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000021//
22// Combining rules :
23//
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000024// if $row_mask and $bank_mask are fully enabled (0xF) and
25// $bound_ctrl==DPP_BOUND_ZERO or $old==0
26// -> $combined_old = undef,
27// $combined_bound_ctrl = DPP_BOUND_ZERO
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000028//
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000029// if the VALU op is binary and
30// $bound_ctrl==DPP_BOUND_OFF and
31// $old==identity value (immediate) for the VALU op
32// -> $combined_old = src1,
33// $combined_bound_ctrl = DPP_BOUND_OFF
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000034//
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000035// Othervise cancel.
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000036//
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000037// The mov_dpp instruction should recide in the same BB as all it's uses
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000038//===----------------------------------------------------------------------===//
39
40#include "AMDGPU.h"
41#include "AMDGPUSubtarget.h"
42#include "SIInstrInfo.h"
43#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44#include "llvm/ADT/SmallVector.h"
45#include "llvm/ADT/Statistic.h"
46#include "llvm/CodeGen/MachineBasicBlock.h"
47#include "llvm/CodeGen/MachineFunction.h"
48#include "llvm/CodeGen/MachineFunctionPass.h"
49#include "llvm/CodeGen/MachineInstr.h"
50#include "llvm/CodeGen/MachineInstrBuilder.h"
51#include "llvm/CodeGen/MachineOperand.h"
52#include "llvm/CodeGen/MachineRegisterInfo.h"
53#include "llvm/CodeGen/TargetRegisterInfo.h"
54#include "llvm/Pass.h"
55#include <cassert>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "gcn-dpp-combine"
60
61STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
62
63namespace {
64
65class GCNDPPCombine : public MachineFunctionPass {
66 MachineRegisterInfo *MRI;
67 const SIInstrInfo *TII;
68
69 using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
70
71 MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
72
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000073 MachineInstr *createDPPInst(MachineInstr &OrigMI,
74 MachineInstr &MovMI,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000075 RegSubRegPair CombOldVGPR,
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000076 MachineOperand *OldOpnd,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000077 bool CombBCZ) const;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000078
79 MachineInstr *createDPPInst(MachineInstr &OrigMI,
80 MachineInstr &MovMI,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000081 RegSubRegPair CombOldVGPR,
82 bool CombBCZ) const;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000083
84 bool hasNoImmOrEqual(MachineInstr &MI,
85 unsigned OpndName,
86 int64_t Value,
87 int64_t Mask = -1) const;
88
89 bool combineDPPMov(MachineInstr &MI) const;
90
91public:
92 static char ID;
93
94 GCNDPPCombine() : MachineFunctionPass(ID) {
95 initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
96 }
97
98 bool runOnMachineFunction(MachineFunction &MF) override;
99
100 StringRef getPassName() const override { return "GCN DPP Combine"; }
101
102 void getAnalysisUsage(AnalysisUsage &AU) const override {
103 AU.setPreservesCFG();
104 MachineFunctionPass::getAnalysisUsage(AU);
105 }
106};
107
108} // end anonymous namespace
109
110INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
111
112char GCNDPPCombine::ID = 0;
113
114char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
115
116FunctionPass *llvm::createGCNDPPCombinePass() {
117 return new GCNDPPCombine();
118}
119
120static int getDPPOp(unsigned Op) {
121 auto DPP32 = AMDGPU::getDPPOp32(Op);
122 if (DPP32 != -1)
123 return DPP32;
124
125 auto E32 = AMDGPU::getVOPe32(Op);
126 return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1;
127}
128
129// tracks the register operand definition and returns:
130// 1. immediate operand used to initialize the register if found
131// 2. nullptr if the register operand is undef
132// 3. the operand itself otherwise
133MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
134 auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
135 if (!Def)
136 return nullptr;
137
138 switch(Def->getOpcode()) {
139 default: break;
140 case AMDGPU::IMPLICIT_DEF:
141 return nullptr;
142 case AMDGPU::COPY:
143 case AMDGPU::V_MOV_B32_e32: {
144 auto &Op1 = Def->getOperand(1);
145 if (Op1.isImm())
146 return &Op1;
147 break;
148 }
149 }
150 return &OldOpnd;
151}
152
153MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
154 MachineInstr &MovMI,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000155 RegSubRegPair CombOldVGPR,
156 bool CombBCZ) const {
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000157 assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
158 assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
159 TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
160
161 auto OrigOp = OrigMI.getOpcode();
162 auto DPPOp = getDPPOp(OrigOp);
163 if (DPPOp == -1) {
164 LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
165 return nullptr;
166 }
167
168 auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
169 OrigMI.getDebugLoc(), TII->get(DPPOp));
170 bool Fail = false;
171 do {
172 auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
173 assert(Dst);
174 DPPInst.add(*Dst);
175 int NumOperands = 1;
176
177 const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
178 if (OldIdx != -1) {
179 assert(OldIdx == NumOperands);
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000180 assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
181 DPPInst.addReg(CombOldVGPR.Reg, 0, CombOldVGPR.SubReg);
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000182 ++NumOperands;
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000183 } else {
184 // TODO: this discards MAC/FMA instructions for now, let's add it later
185 LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction,"
186 " TBD\n");
187 Fail = true;
188 break;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000189 }
190
191 if (auto *Mod0 = TII->getNamedOperand(OrigMI,
192 AMDGPU::OpName::src0_modifiers)) {
193 assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
194 AMDGPU::OpName::src0_modifiers));
195 assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
196 DPPInst.addImm(Mod0->getImm());
197 ++NumOperands;
198 }
199 auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
200 assert(Src0);
201 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
202 LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
203 Fail = true;
204 break;
205 }
206 DPPInst.add(*Src0);
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000207 DPPInst->getOperand(NumOperands).setIsKill(false);
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000208 ++NumOperands;
209
210 if (auto *Mod1 = TII->getNamedOperand(OrigMI,
211 AMDGPU::OpName::src1_modifiers)) {
212 assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
213 AMDGPU::OpName::src1_modifiers));
214 assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
215 DPPInst.addImm(Mod1->getImm());
216 ++NumOperands;
217 }
218 if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
219 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
220 LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
221 Fail = true;
222 break;
223 }
224 DPPInst.add(*Src1);
225 ++NumOperands;
226 }
227
228 if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
229 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
230 LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
231 Fail = true;
232 break;
233 }
234 DPPInst.add(*Src2);
235 }
236
237 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
238 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
239 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000240 DPPInst.addImm(CombBCZ ? 1 : 0);
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000241 } while (false);
242
243 if (Fail) {
244 DPPInst.getInstr()->eraseFromParent();
245 return nullptr;
246 }
247 LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
248 return DPPInst.getInstr();
249}
250
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000251static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
252 assert(OldOpnd->isImm());
253 switch (OrigMIOp) {
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000254 default: break;
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000255 case AMDGPU::V_ADD_U32_e32:
256 case AMDGPU::V_ADD_I32_e32:
257 case AMDGPU::V_OR_B32_e32:
258 case AMDGPU::V_SUBREV_U32_e32:
259 case AMDGPU::V_SUBREV_I32_e32:
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000260 case AMDGPU::V_MAX_U32_e32:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000261 case AMDGPU::V_XOR_B32_e32:
262 if (OldOpnd->getImm() == 0)
263 return true;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000264 break;
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000265 case AMDGPU::V_AND_B32_e32:
266 case AMDGPU::V_MIN_U32_e32:
267 if (static_cast<uint32_t>(OldOpnd->getImm()) ==
268 std::numeric_limits<uint32_t>::max())
269 return true;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000270 break;
271 case AMDGPU::V_MIN_I32_e32:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000272 if (static_cast<int32_t>(OldOpnd->getImm()) ==
273 std::numeric_limits<int32_t>::max())
274 return true;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000275 break;
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000276 case AMDGPU::V_MAX_I32_e32:
277 if (static_cast<int32_t>(OldOpnd->getImm()) ==
278 std::numeric_limits<int32_t>::min())
279 return true;
280 break;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000281 case AMDGPU::V_MUL_I32_I24_e32:
282 case AMDGPU::V_MUL_U32_U24_e32:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000283 if (OldOpnd->getImm() == 1)
284 return true;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000285 break;
286 }
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000287 return false;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000288}
289
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000290MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
291 MachineInstr &MovMI,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000292 RegSubRegPair CombOldVGPR,
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000293 MachineOperand *OldOpndValue,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000294 bool CombBCZ) const {
295 assert(CombOldVGPR.Reg);
296 if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
297 auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
298 if (!Src1 || !Src1->isReg()) {
299 LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n");
300 return nullptr;
301 }
302 if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
303 LLVM_DEBUG(dbgs() << " failed: old immediate ins't an identity\n");
304 return nullptr;
305 }
306 CombOldVGPR = getRegSubRegPair(*Src1);
307 if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
308 LLVM_DEBUG(dbgs() << " failed: src1 isn't a VGPR32 register\n");
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000309 return nullptr;
310 }
311 }
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000312 return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000313}
314
315// returns true if MI doesn't have OpndName immediate operand or the
316// operand has Value
317bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
318 int64_t Value, int64_t Mask) const {
319 auto *Imm = TII->getNamedOperand(MI, OpndName);
320 if (!Imm)
321 return true;
322
323 assert(Imm->isImm());
324 return (Imm->getImm() & Mask) == Value;
325}
326
327bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
328 assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000329 LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
330
331 auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
332 assert(DstOpnd && DstOpnd->isReg());
333 auto DPPMovReg = DstOpnd->getReg();
334 if (!isEXECMaskConstantBetweenDefAndUses(DPPMovReg, *MRI)) {
335 LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
336 " for all uses\n");
337 return false;
338 }
339
340 auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
341 assert(RowMaskOpnd && RowMaskOpnd->isImm());
342 auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
343 assert(BankMaskOpnd && BankMaskOpnd->isImm());
344 const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
345 BankMaskOpnd->getImm() == 0xF;
346
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000347 auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
348 assert(BCZOpnd && BCZOpnd->isImm());
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000349 bool BoundCtrlZero = BCZOpnd->getImm();
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000350
351 auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
352 assert(OldOpnd && OldOpnd->isReg());
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000353
354 auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
355 // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
356 // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
357 // but the third option is used to distinguish undef from non-immediate
358 // to reuse IMPLICIT_DEF instruction later
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000359 assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000360
361 bool CombBCZ = false;
362
363 if (MaskAllLanes && BoundCtrlZero) { // [1]
364 CombBCZ = true;
365 } else {
366 if (!OldOpndValue || !OldOpndValue->isImm()) {
367 LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n");
368 return false;
369 }
370
371 if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
372 LLVM_DEBUG(dbgs() <<
373 " failed: old reg def and mov should be in the same BB\n");
374 return false;
375 }
376
377 if (OldOpndValue->getImm() == 0) {
378 if (MaskAllLanes) {
379 assert(!BoundCtrlZero); // by check [1]
380 CombBCZ = true;
Valery Pykhtin1e0b5c72019-01-09 13:43:32 +0000381 }
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000382 } else if (BoundCtrlZero) {
383 assert(!MaskAllLanes); // by check [1]
384 LLVM_DEBUG(dbgs() <<
385 " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
386 return false;
Valery Pykhtin1e0b5c72019-01-09 13:43:32 +0000387 }
Valery Pykhtinb7a45952019-01-09 15:21:53 +0000388 }
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000389
390 LLVM_DEBUG(dbgs() << " old=";
391 if (!OldOpndValue)
392 dbgs() << "undef";
393 else
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000394 dbgs() << *OldOpndValue;
395 dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000396
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000397 SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
398 auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
399 // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
400 if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
401 CombOldVGPR = RegSubRegPair(
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000402 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
403 auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000404 TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000405 DPPMIs.push_back(UndefInst.getInstr());
406 }
407
408 OrigMIs.push_back(&MovMI);
409 bool Rollback = true;
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000410 for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000411 Rollback = true;
412
413 auto &OrigMI = *Use.getParent();
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000414 LLVM_DEBUG(dbgs() << " try: " << OrigMI);
415
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000416 auto OrigOp = OrigMI.getOpcode();
417 if (TII->isVOP3(OrigOp)) {
418 if (!TII->hasVALU32BitEncoding(OrigOp)) {
419 LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n");
420 break;
421 }
422 // check if other than abs|neg modifiers are set (opsel for example)
423 const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
424 if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
425 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
426 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
427 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
428 LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n");
429 break;
430 }
431 } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
432 LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
433 break;
434 }
435
436 LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
437 if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000438 if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
439 OldOpndValue, CombBCZ)) {
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000440 DPPMIs.push_back(DPPInst);
441 Rollback = false;
442 }
443 } else if (OrigMI.isCommutable() &&
444 &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
445 auto *BB = OrigMI.getParent();
446 auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
447 BB->insert(OrigMI, NewMI);
448 if (TII->commuteInstruction(*NewMI)) {
449 LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000450 if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
451 OldOpndValue, CombBCZ)) {
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000452 DPPMIs.push_back(DPPInst);
453 Rollback = false;
454 }
455 } else
456 LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
457 NewMI->eraseFromParent();
458 } else
459 LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
460 if (Rollback)
461 break;
462 OrigMIs.push_back(&OrigMI);
463 }
464
465 for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
466 MI->eraseFromParent();
467
468 return !Rollback;
469}
470
471bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
472 auto &ST = MF.getSubtarget<GCNSubtarget>();
473 if (!ST.hasDPP() || skipFunction(MF.getFunction()))
474 return false;
475
476 MRI = &MF.getRegInfo();
477 TII = ST.getInstrInfo();
478
479 assert(MRI->isSSA() && "Must be run on SSA");
480
481 bool Changed = false;
482 for (auto &MBB : MF) {
483 for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
484 auto &MI = *I++;
485 if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
486 Changed = true;
487 ++NumDPPMovsCombined;
488 }
489 }
490 }
491 return Changed;
492}