blob: 7348b5b56c8b37da9212ea012b6aeb489e3c3d1a [file] [log] [blame]
Valery Pykhtin3d9afa22018-11-30 14:21:56 +00001//=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Valery Pykhtin3d9afa22018-11-30 14:21:56 +00006//
7//===----------------------------------------------------------------------===//
8// The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
Valery Pykhtin7fe97f82019-02-08 11:59:48 +00009// operand. If any of the use instruction cannot be combined with the mov the
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000010// whole sequence is reverted.
11//
12// $old = ...
13// $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000014// dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15// $res = VALU $dpp_value [, src1]
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000016//
17// to
18//
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000019// $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20// dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000021//
22// Combining rules :
23//
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000024// if $row_mask and $bank_mask are fully enabled (0xF) and
25// $bound_ctrl==DPP_BOUND_ZERO or $old==0
26// -> $combined_old = undef,
27// $combined_bound_ctrl = DPP_BOUND_ZERO
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000028//
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000029// if the VALU op is binary and
30// $bound_ctrl==DPP_BOUND_OFF and
31// $old==identity value (immediate) for the VALU op
32// -> $combined_old = src1,
33// $combined_bound_ctrl = DPP_BOUND_OFF
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000034//
Jay Foad0cd50b22019-07-04 15:04:29 +000035// Otherwise cancel.
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000036//
Jay Foad0cd50b22019-07-04 15:04:29 +000037// The mov_dpp instruction should reside in the same BB as all its uses
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000038//===----------------------------------------------------------------------===//
39
40#include "AMDGPU.h"
41#include "AMDGPUSubtarget.h"
42#include "SIInstrInfo.h"
43#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44#include "llvm/ADT/SmallVector.h"
45#include "llvm/ADT/Statistic.h"
46#include "llvm/CodeGen/MachineBasicBlock.h"
47#include "llvm/CodeGen/MachineFunction.h"
48#include "llvm/CodeGen/MachineFunctionPass.h"
49#include "llvm/CodeGen/MachineInstr.h"
50#include "llvm/CodeGen/MachineInstrBuilder.h"
51#include "llvm/CodeGen/MachineOperand.h"
52#include "llvm/CodeGen/MachineRegisterInfo.h"
53#include "llvm/CodeGen/TargetRegisterInfo.h"
54#include "llvm/Pass.h"
55#include <cassert>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "gcn-dpp-combine"
60
61STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
62
63namespace {
64
65class GCNDPPCombine : public MachineFunctionPass {
66 MachineRegisterInfo *MRI;
67 const SIInstrInfo *TII;
68
69 using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
70
71 MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
72
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000073 MachineInstr *createDPPInst(MachineInstr &OrigMI,
74 MachineInstr &MovMI,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000075 RegSubRegPair CombOldVGPR,
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000076 MachineOperand *OldOpnd,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000077 bool CombBCZ) const;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000078
79 MachineInstr *createDPPInst(MachineInstr &OrigMI,
80 MachineInstr &MovMI,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +000081 RegSubRegPair CombOldVGPR,
82 bool CombBCZ) const;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +000083
84 bool hasNoImmOrEqual(MachineInstr &MI,
85 unsigned OpndName,
86 int64_t Value,
87 int64_t Mask = -1) const;
88
89 bool combineDPPMov(MachineInstr &MI) const;
90
91public:
92 static char ID;
93
94 GCNDPPCombine() : MachineFunctionPass(ID) {
95 initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
96 }
97
98 bool runOnMachineFunction(MachineFunction &MF) override;
99
100 StringRef getPassName() const override { return "GCN DPP Combine"; }
101
102 void getAnalysisUsage(AnalysisUsage &AU) const override {
103 AU.setPreservesCFG();
104 MachineFunctionPass::getAnalysisUsage(AU);
105 }
106};
107
108} // end anonymous namespace
109
110INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
111
112char GCNDPPCombine::ID = 0;
113
114char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
115
116FunctionPass *llvm::createGCNDPPCombinePass() {
117 return new GCNDPPCombine();
118}
119
120static int getDPPOp(unsigned Op) {
121 auto DPP32 = AMDGPU::getDPPOp32(Op);
122 if (DPP32 != -1)
123 return DPP32;
124
125 auto E32 = AMDGPU::getVOPe32(Op);
126 return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1;
127}
128
129// tracks the register operand definition and returns:
130// 1. immediate operand used to initialize the register if found
131// 2. nullptr if the register operand is undef
132// 3. the operand itself otherwise
133MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
134 auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
135 if (!Def)
136 return nullptr;
137
138 switch(Def->getOpcode()) {
139 default: break;
140 case AMDGPU::IMPLICIT_DEF:
141 return nullptr;
142 case AMDGPU::COPY:
143 case AMDGPU::V_MOV_B32_e32: {
144 auto &Op1 = Def->getOperand(1);
145 if (Op1.isImm())
146 return &Op1;
147 break;
148 }
149 }
150 return &OldOpnd;
151}
152
153MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
154 MachineInstr &MovMI,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000155 RegSubRegPair CombOldVGPR,
156 bool CombBCZ) const {
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000157 assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
158 assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
159 TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
160
161 auto OrigOp = OrigMI.getOpcode();
162 auto DPPOp = getDPPOp(OrigOp);
163 if (DPPOp == -1) {
164 LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n");
165 return nullptr;
166 }
167
168 auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
169 OrigMI.getDebugLoc(), TII->get(DPPOp));
170 bool Fail = false;
171 do {
172 auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
173 assert(Dst);
174 DPPInst.add(*Dst);
175 int NumOperands = 1;
176
177 const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
178 if (OldIdx != -1) {
179 assert(OldIdx == NumOperands);
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000180 assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
181 DPPInst.addReg(CombOldVGPR.Reg, 0, CombOldVGPR.SubReg);
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000182 ++NumOperands;
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000183 } else {
184 // TODO: this discards MAC/FMA instructions for now, let's add it later
185 LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction,"
186 " TBD\n");
187 Fail = true;
188 break;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000189 }
190
191 if (auto *Mod0 = TII->getNamedOperand(OrigMI,
192 AMDGPU::OpName::src0_modifiers)) {
193 assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
194 AMDGPU::OpName::src0_modifiers));
195 assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
196 DPPInst.addImm(Mod0->getImm());
197 ++NumOperands;
198 }
199 auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
200 assert(Src0);
201 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
202 LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n");
203 Fail = true;
204 break;
205 }
206 DPPInst.add(*Src0);
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000207 DPPInst->getOperand(NumOperands).setIsKill(false);
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000208 ++NumOperands;
209
210 if (auto *Mod1 = TII->getNamedOperand(OrigMI,
211 AMDGPU::OpName::src1_modifiers)) {
212 assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
213 AMDGPU::OpName::src1_modifiers));
214 assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
215 DPPInst.addImm(Mod1->getImm());
216 ++NumOperands;
217 }
218 if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
219 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
220 LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n");
221 Fail = true;
222 break;
223 }
224 DPPInst.add(*Src1);
225 ++NumOperands;
226 }
227
228 if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
229 if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
230 LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n");
231 Fail = true;
232 break;
233 }
234 DPPInst.add(*Src2);
235 }
236
237 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
238 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
239 DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000240 DPPInst.addImm(CombBCZ ? 1 : 0);
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000241 } while (false);
242
243 if (Fail) {
244 DPPInst.getInstr()->eraseFromParent();
245 return nullptr;
246 }
247 LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr());
248 return DPPInst.getInstr();
249}
250
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000251static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
252 assert(OldOpnd->isImm());
253 switch (OrigMIOp) {
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000254 default: break;
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000255 case AMDGPU::V_ADD_U32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000256 case AMDGPU::V_ADD_U32_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000257 case AMDGPU::V_ADD_I32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000258 case AMDGPU::V_ADD_I32_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000259 case AMDGPU::V_OR_B32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000260 case AMDGPU::V_OR_B32_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000261 case AMDGPU::V_SUBREV_U32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000262 case AMDGPU::V_SUBREV_U32_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000263 case AMDGPU::V_SUBREV_I32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000264 case AMDGPU::V_SUBREV_I32_e64:
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000265 case AMDGPU::V_MAX_U32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000266 case AMDGPU::V_MAX_U32_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000267 case AMDGPU::V_XOR_B32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000268 case AMDGPU::V_XOR_B32_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000269 if (OldOpnd->getImm() == 0)
270 return true;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000271 break;
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000272 case AMDGPU::V_AND_B32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000273 case AMDGPU::V_AND_B32_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000274 case AMDGPU::V_MIN_U32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000275 case AMDGPU::V_MIN_U32_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000276 if (static_cast<uint32_t>(OldOpnd->getImm()) ==
277 std::numeric_limits<uint32_t>::max())
278 return true;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000279 break;
280 case AMDGPU::V_MIN_I32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000281 case AMDGPU::V_MIN_I32_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000282 if (static_cast<int32_t>(OldOpnd->getImm()) ==
283 std::numeric_limits<int32_t>::max())
284 return true;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000285 break;
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000286 case AMDGPU::V_MAX_I32_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000287 case AMDGPU::V_MAX_I32_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000288 if (static_cast<int32_t>(OldOpnd->getImm()) ==
289 std::numeric_limits<int32_t>::min())
290 return true;
291 break;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000292 case AMDGPU::V_MUL_I32_I24_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000293 case AMDGPU::V_MUL_I32_I24_e64:
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000294 case AMDGPU::V_MUL_U32_U24_e32:
Jay Foad7e0c10b2019-07-05 14:52:48 +0000295 case AMDGPU::V_MUL_U32_U24_e64:
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000296 if (OldOpnd->getImm() == 1)
297 return true;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000298 break;
299 }
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000300 return false;
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000301}
302
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000303MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
304 MachineInstr &MovMI,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000305 RegSubRegPair CombOldVGPR,
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000306 MachineOperand *OldOpndValue,
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000307 bool CombBCZ) const {
308 assert(CombOldVGPR.Reg);
309 if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
310 auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
311 if (!Src1 || !Src1->isReg()) {
312 LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n");
313 return nullptr;
314 }
315 if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
Jay Foad0cd50b22019-07-04 15:04:29 +0000316 LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n");
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000317 return nullptr;
318 }
319 CombOldVGPR = getRegSubRegPair(*Src1);
320 if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
321 LLVM_DEBUG(dbgs() << " failed: src1 isn't a VGPR32 register\n");
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000322 return nullptr;
323 }
324 }
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000325 return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000326}
327
328// returns true if MI doesn't have OpndName immediate operand or the
329// operand has Value
330bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
331 int64_t Value, int64_t Mask) const {
332 auto *Imm = TII->getNamedOperand(MI, OpndName);
333 if (!Imm)
334 return true;
335
336 assert(Imm->isImm());
337 return (Imm->getImm() & Mask) == Value;
338}
339
340bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
341 assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000342 LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
343
344 auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
345 assert(DstOpnd && DstOpnd->isReg());
346 auto DPPMovReg = DstOpnd->getReg();
Matt Arsenaultf39f3bd2019-06-18 12:48:36 +0000347 if (execMayBeModifiedBeforeUse(*MRI, DPPMovReg, MovMI)) {
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000348 LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
349 " for all uses\n");
350 return false;
351 }
352
353 auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
354 assert(RowMaskOpnd && RowMaskOpnd->isImm());
355 auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
356 assert(BankMaskOpnd && BankMaskOpnd->isImm());
357 const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
358 BankMaskOpnd->getImm() == 0xF;
359
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000360 auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
361 assert(BCZOpnd && BCZOpnd->isImm());
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000362 bool BoundCtrlZero = BCZOpnd->getImm();
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000363
364 auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
365 assert(OldOpnd && OldOpnd->isReg());
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000366
367 auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
368 // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
369 // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
370 // but the third option is used to distinguish undef from non-immediate
371 // to reuse IMPLICIT_DEF instruction later
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000372 assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000373
374 bool CombBCZ = false;
375
376 if (MaskAllLanes && BoundCtrlZero) { // [1]
377 CombBCZ = true;
378 } else {
379 if (!OldOpndValue || !OldOpndValue->isImm()) {
380 LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n");
381 return false;
382 }
383
384 if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
385 LLVM_DEBUG(dbgs() <<
386 " failed: old reg def and mov should be in the same BB\n");
387 return false;
388 }
389
390 if (OldOpndValue->getImm() == 0) {
391 if (MaskAllLanes) {
392 assert(!BoundCtrlZero); // by check [1]
393 CombBCZ = true;
Valery Pykhtin1e0b5c72019-01-09 13:43:32 +0000394 }
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000395 } else if (BoundCtrlZero) {
396 assert(!MaskAllLanes); // by check [1]
397 LLVM_DEBUG(dbgs() <<
398 " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
399 return false;
Valery Pykhtin1e0b5c72019-01-09 13:43:32 +0000400 }
Valery Pykhtinb7a45952019-01-09 15:21:53 +0000401 }
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000402
403 LLVM_DEBUG(dbgs() << " old=";
404 if (!OldOpndValue)
405 dbgs() << "undef";
406 else
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000407 dbgs() << *OldOpndValue;
408 dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000409
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000410 SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
411 auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
412 // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
413 if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
414 CombOldVGPR = RegSubRegPair(
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000415 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
416 auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000417 TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000418 DPPMIs.push_back(UndefInst.getInstr());
419 }
420
421 OrigMIs.push_back(&MovMI);
422 bool Rollback = true;
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000423 for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000424 Rollback = true;
425
426 auto &OrigMI = *Use.getParent();
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000427 LLVM_DEBUG(dbgs() << " try: " << OrigMI);
428
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000429 auto OrigOp = OrigMI.getOpcode();
430 if (TII->isVOP3(OrigOp)) {
431 if (!TII->hasVALU32BitEncoding(OrigOp)) {
432 LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n");
433 break;
434 }
435 // check if other than abs|neg modifiers are set (opsel for example)
436 const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
437 if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
438 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
439 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
440 !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
441 LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n");
442 break;
443 }
444 } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
445 LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n");
446 break;
447 }
448
449 LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
450 if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000451 if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
452 OldOpndValue, CombBCZ)) {
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000453 DPPMIs.push_back(DPPInst);
454 Rollback = false;
455 }
456 } else if (OrigMI.isCommutable() &&
457 &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
458 auto *BB = OrigMI.getParent();
459 auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
460 BB->insert(OrigMI, NewMI);
461 if (TII->commuteInstruction(*NewMI)) {
462 LLVM_DEBUG(dbgs() << " commuted: " << *NewMI);
Valery Pykhtin7fe97f82019-02-08 11:59:48 +0000463 if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
464 OldOpndValue, CombBCZ)) {
Valery Pykhtin3d9afa22018-11-30 14:21:56 +0000465 DPPMIs.push_back(DPPInst);
466 Rollback = false;
467 }
468 } else
469 LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n");
470 NewMI->eraseFromParent();
471 } else
472 LLVM_DEBUG(dbgs() << " failed: no suitable operands\n");
473 if (Rollback)
474 break;
475 OrigMIs.push_back(&OrigMI);
476 }
477
478 for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
479 MI->eraseFromParent();
480
481 return !Rollback;
482}
483
484bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
485 auto &ST = MF.getSubtarget<GCNSubtarget>();
486 if (!ST.hasDPP() || skipFunction(MF.getFunction()))
487 return false;
488
489 MRI = &MF.getRegInfo();
490 TII = ST.getInstrInfo();
491
492 assert(MRI->isSSA() && "Must be run on SSA");
493
494 bool Changed = false;
495 for (auto &MBB : MF) {
496 for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
497 auto &MI = *I++;
498 if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
499 Changed = true;
500 ++NumDPPMovsCombined;
501 }
502 }
503 }
504 return Changed;
505}