blob: 939062817a1e07808ddeb71e8873a13513cb758d [file] [log] [blame]
Eugene Zelenko59e12822017-08-08 00:47:13 +00001//===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===//
Sam Koltonf60ad582017-03-21 12:51:34 +00002//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file This pass tries to apply several peephole SDWA patterns.
11///
12/// E.g. original:
13/// V_LSHRREV_B32_e32 %vreg0, 16, %vreg1
14/// V_ADD_I32_e32 %vreg2, %vreg0, %vreg3
15/// V_LSHLREV_B32_e32 %vreg4, 16, %vreg2
16///
17/// Replace:
18/// V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3
19/// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
20///
21//===----------------------------------------------------------------------===//
22
Sam Koltonf60ad582017-03-21 12:51:34 +000023#include "AMDGPU.h"
24#include "AMDGPUSubtarget.h"
25#include "SIDefines.h"
26#include "SIInstrInfo.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000027#include "SIRegisterInfo.h"
28#include "Utils/AMDGPUBaseInfo.h"
29#include "llvm/ADT/None.h"
30#include "llvm/ADT/Optional.h"
Sam Koltonf60ad582017-03-21 12:51:34 +000031#include "llvm/ADT/STLExtras.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000032#include "llvm/ADT/SmallVector.h"
Chandler Carruth6bda14b2017-06-06 11:49:48 +000033#include "llvm/ADT/Statistic.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000034#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFunction.h"
Sam Koltonf60ad582017-03-21 12:51:34 +000036#include "llvm/CodeGen/MachineFunctionPass.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000037#include "llvm/CodeGen/MachineInstr.h"
Sam Koltonf60ad582017-03-21 12:51:34 +000038#include "llvm/CodeGen/MachineInstrBuilder.h"
Eugene Zelenko59e12822017-08-08 00:47:13 +000039#include "llvm/CodeGen/MachineOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/MC/LaneBitmask.h"
42#include "llvm/MC/MCInstrDesc.h"
43#include "llvm/Pass.h"
44#include "llvm/Support/Debug.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/Target/TargetRegisterInfo.h"
47#include <algorithm>
48#include <cassert>
49#include <cstdint>
50#include <memory>
Sam Koltonf60ad582017-03-21 12:51:34 +000051#include <unordered_map>
52
53using namespace llvm;
54
55#define DEBUG_TYPE "si-peephole-sdwa"
56
57STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
58STATISTIC(NumSDWAInstructionsPeepholed,
59 "Number of instruction converted to SDWA.");
60
61namespace {
62
63class SDWAOperand;
64
65class SIPeepholeSDWA : public MachineFunctionPass {
Sam Koltonebfdaf72017-05-18 12:12:03 +000066public:
Eugene Zelenko59e12822017-08-08 00:47:13 +000067 using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>;
Sam Koltonebfdaf72017-05-18 12:12:03 +000068
Sam Koltonf60ad582017-03-21 12:51:34 +000069private:
70 MachineRegisterInfo *MRI;
71 const SIRegisterInfo *TRI;
72 const SIInstrInfo *TII;
73
74 std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
Sam Koltonebfdaf72017-05-18 12:12:03 +000075 std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +000076 SmallVector<MachineInstr *, 8> ConvertedInstructions;
Sam Koltonf60ad582017-03-21 12:51:34 +000077
Sam Kolton27e0f8b2017-03-31 11:42:43 +000078 Optional<int64_t> foldToImm(const MachineOperand &Op) const;
79
Sam Koltonf60ad582017-03-21 12:51:34 +000080public:
81 static char ID;
82
Sam Koltonf60ad582017-03-21 12:51:34 +000083 SIPeepholeSDWA() : MachineFunctionPass(ID) {
84 initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
85 }
86
87 bool runOnMachineFunction(MachineFunction &MF) override;
Sam Koltonaff83412017-04-12 09:36:05 +000088 void matchSDWAOperands(MachineFunction &MF);
Sam Kolton3c4933f2017-06-22 06:26:41 +000089 bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const;
Sam Koltonf60ad582017-03-21 12:51:34 +000090 bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
Sam Kolton3c4933f2017-06-22 06:26:41 +000091 void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const;
Sam Koltonf60ad582017-03-21 12:51:34 +000092
93 StringRef getPassName() const override { return "SI Peephole SDWA"; }
94
95 void getAnalysisUsage(AnalysisUsage &AU) const override {
96 AU.setPreservesCFG();
97 MachineFunctionPass::getAnalysisUsage(AU);
98 }
99};
100
101class SDWAOperand {
102private:
103 MachineOperand *Target; // Operand that would be used in converted instruction
104 MachineOperand *Replaced; // Operand that would be replace by Target
105
106public:
107 SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
108 : Target(TargetOp), Replaced(ReplacedOp) {
109 assert(Target->isReg());
110 assert(Replaced->isReg());
111 }
112
Eugene Zelenko59e12822017-08-08 00:47:13 +0000113 virtual ~SDWAOperand() = default;
Sam Koltonf60ad582017-03-21 12:51:34 +0000114
115 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
116 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
117
118 MachineOperand *getTargetOperand() const { return Target; }
119 MachineOperand *getReplacedOperand() const { return Replaced; }
120 MachineInstr *getParentInst() const { return Target->getParent(); }
Eugene Zelenko59e12822017-08-08 00:47:13 +0000121
Sam Koltonf60ad582017-03-21 12:51:34 +0000122 MachineRegisterInfo *getMRI() const {
123 return &getParentInst()->getParent()->getParent()->getRegInfo();
124 }
125};
126
127using namespace AMDGPU::SDWA;
128
129class SDWASrcOperand : public SDWAOperand {
130private:
131 SdwaSel SrcSel;
132 bool Abs;
133 bool Neg;
134 bool Sext;
135
136public:
137 SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
138 SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
139 bool Sext_ = false)
140 : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
141 Neg(Neg_), Sext(Sext_) {}
142
Eugene Zelenko59e12822017-08-08 00:47:13 +0000143 MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
144 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
Sam Koltonf60ad582017-03-21 12:51:34 +0000145
146 SdwaSel getSrcSel() const { return SrcSel; }
147 bool getAbs() const { return Abs; }
148 bool getNeg() const { return Neg; }
149 bool getSext() const { return Sext; }
150
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000151 uint64_t getSrcMods(const SIInstrInfo *TII,
152 const MachineOperand *SrcOp) const;
Sam Koltonf60ad582017-03-21 12:51:34 +0000153};
154
155class SDWADstOperand : public SDWAOperand {
156private:
157 SdwaSel DstSel;
158 DstUnused DstUn;
159
160public:
161 SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
162 SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
163 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
164
Eugene Zelenko59e12822017-08-08 00:47:13 +0000165 MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
166 bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
Sam Koltonf60ad582017-03-21 12:51:34 +0000167
168 SdwaSel getDstSel() const { return DstSel; }
169 DstUnused getDstUnused() const { return DstUn; }
170};
171
Eugene Zelenko59e12822017-08-08 00:47:13 +0000172} // end anonymous namespace
Sam Koltonf60ad582017-03-21 12:51:34 +0000173
174INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
175
176char SIPeepholeSDWA::ID = 0;
177
178char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
179
180FunctionPass *llvm::createSIPeepholeSDWAPass() {
181 return new SIPeepholeSDWA();
182}
183
184#ifndef NDEBUG
Sam Koltonf60ad582017-03-21 12:51:34 +0000185static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) {
186 switch(Sel) {
187 case BYTE_0: OS << "BYTE_0"; break;
188 case BYTE_1: OS << "BYTE_1"; break;
189 case BYTE_2: OS << "BYTE_2"; break;
190 case BYTE_3: OS << "BYTE_3"; break;
191 case WORD_0: OS << "WORD_0"; break;
192 case WORD_1: OS << "WORD_1"; break;
193 case DWORD: OS << "DWORD"; break;
194 }
195 return OS;
196}
197
198static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
199 switch(Un) {
200 case UNUSED_PAD: OS << "UNUSED_PAD"; break;
201 case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
202 case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
203 }
204 return OS;
205}
206
207static raw_ostream& operator<<(raw_ostream &OS, const SDWASrcOperand &Src) {
208 OS << "SDWA src: " << *Src.getTargetOperand()
209 << " src_sel:" << Src.getSrcSel()
210 << " abs:" << Src.getAbs() << " neg:" << Src.getNeg()
211 << " sext:" << Src.getSext() << '\n';
212 return OS;
213}
214
215static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) {
216 OS << "SDWA dst: " << *Dst.getTargetOperand()
217 << " dst_sel:" << Dst.getDstSel()
218 << " dst_unused:" << Dst.getDstUnused() << '\n';
219 return OS;
220}
Sam Koltonf60ad582017-03-21 12:51:34 +0000221#endif
222
Sam Koltonf60ad582017-03-21 12:51:34 +0000223static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
224 assert(To.isReg() && From.isReg());
225 To.setReg(From.getReg());
226 To.setSubReg(From.getSubReg());
227 To.setIsUndef(From.isUndef());
228 if (To.isUse()) {
229 To.setIsKill(From.isKill());
230 } else {
231 To.setIsDead(From.isDead());
232 }
233}
234
235static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
236 return LHS.isReg() &&
237 RHS.isReg() &&
238 LHS.getReg() == RHS.getReg() &&
239 LHS.getSubReg() == RHS.getSubReg();
240}
241
242static bool isSubregOf(const MachineOperand &SubReg,
243 const MachineOperand &SuperReg,
244 const TargetRegisterInfo *TRI) {
Sam Kolton549c89d2017-06-21 08:53:38 +0000245
Sam Koltonf60ad582017-03-21 12:51:34 +0000246 if (!SuperReg.isReg() || !SubReg.isReg())
247 return false;
248
249 if (isSameReg(SuperReg, SubReg))
250 return true;
251
252 if (SuperReg.getReg() != SubReg.getReg())
253 return false;
254
Sam Kolton9fa16962017-04-06 15:03:28 +0000255 LaneBitmask SuperMask = TRI->getSubRegIndexLaneMask(SuperReg.getSubReg());
256 LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubReg.getSubReg());
257 SuperMask |= ~SubMask;
258 return SuperMask.all();
Sam Koltonf60ad582017-03-21 12:51:34 +0000259}
260
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000261uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII,
262 const MachineOperand *SrcOp) const {
Sam Koltonf60ad582017-03-21 12:51:34 +0000263 uint64_t Mods = 0;
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000264 const auto *MI = SrcOp->getParent();
265 if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) {
266 if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
267 Mods = Mod->getImm();
268 }
269 } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) {
270 if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) {
271 Mods = Mod->getImm();
272 }
273 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000274 if (Abs || Neg) {
275 assert(!Sext &&
276 "Float and integer src modifiers can't be set simulteniously");
277 Mods |= Abs ? SISrcMods::ABS : 0;
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000278 Mods ^= Neg ? SISrcMods::NEG : 0;
Sam Koltonf60ad582017-03-21 12:51:34 +0000279 } else if (Sext) {
280 Mods |= SISrcMods::SEXT;
281 }
282
283 return Mods;
284}
285
286MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
287 // For SDWA src operand potential instruction is one that use register
288 // defined by parent instruction
289 MachineRegisterInfo *MRI = getMRI();
290 MachineOperand *Replaced = getReplacedOperand();
291 assert(Replaced->isReg());
292
293 MachineInstr *PotentialMI = nullptr;
294 for (MachineOperand &PotentialMO : MRI->use_operands(Replaced->getReg())) {
295 // If this is use of another subreg of dst reg then do nothing
296 if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
297 continue;
298
Sam Koltonaff83412017-04-12 09:36:05 +0000299 // If there exist use of superreg of dst then we should not combine this
300 // opernad
301 if (!isSameReg(PotentialMO, *Replaced))
Sam Koltonf60ad582017-03-21 12:51:34 +0000302 return nullptr;
303
304 // Check that PotentialMI is only instruction that uses dst reg
305 if (PotentialMI == nullptr) {
306 PotentialMI = PotentialMO.getParent();
307 } else if (PotentialMI != PotentialMO.getParent()) {
308 return nullptr;
309 }
310 }
311
312 return PotentialMI;
313}
314
315bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
316 // Find operand in instruction that matches source operand and replace it with
317 // target operand. Set corresponding src_sel
318
319 MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
320 MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
321 MachineOperand *SrcMods =
322 TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000323 assert(Src && (Src->isReg() || Src->isImm()));
Sam Koltonf60ad582017-03-21 12:51:34 +0000324 if (!isSameReg(*Src, *getReplacedOperand())) {
325 // If this is not src0 then it should be src1
326 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
327 SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
328 SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
329
330 assert(Src && Src->isReg());
331
332 if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
333 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
334 !isSameReg(*Src, *getReplacedOperand())) {
335 // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
336 // src2. This is not allowed.
337 return false;
338 }
339
340 assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods);
341 }
342 copyRegOperand(*Src, *getTargetOperand());
343 SrcSel->setImm(getSrcSel());
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000344 SrcMods->setImm(getSrcMods(TII, Src));
Sam Koltonf60ad582017-03-21 12:51:34 +0000345 getTargetOperand()->setIsKill(false);
346 return true;
347}
348
349MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
350 // For SDWA dst operand potential instruction is one that defines register
351 // that this operand uses
352 MachineRegisterInfo *MRI = getMRI();
353 MachineInstr *ParentMI = getParentInst();
354 MachineOperand *Replaced = getReplacedOperand();
355 assert(Replaced->isReg());
356
357 for (MachineOperand &PotentialMO : MRI->def_operands(Replaced->getReg())) {
358 if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
359 continue;
360
Sam Koltonaff83412017-04-12 09:36:05 +0000361 if (!isSameReg(*Replaced, PotentialMO))
Sam Koltonf60ad582017-03-21 12:51:34 +0000362 return nullptr;
363
364 // Check that ParentMI is the only instruction that uses replaced register
365 for (MachineOperand &UseMO : MRI->use_operands(PotentialMO.getReg())) {
366 if (isSubregOf(UseMO, PotentialMO, MRI->getTargetRegisterInfo()) &&
367 UseMO.getParent() != ParentMI) {
368 return nullptr;
369 }
370 }
371
372 // Due to SSA this should be onle def of replaced register, so return it
373 return PotentialMO.getParent();
374 }
375
376 return nullptr;
377}
378
379bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
380 // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
381
382 if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
383 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
384 getDstSel() != AMDGPU::SDWA::DWORD) {
385 // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
386 return false;
387 }
388
389 MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
390 assert(Operand &&
391 Operand->isReg() &&
392 isSameReg(*Operand, *getReplacedOperand()));
393 copyRegOperand(*Operand, *getTargetOperand());
394 MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
395 assert(DstSel);
396 DstSel->setImm(getDstSel());
397 MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
398 assert(DstUnused);
399 DstUnused->setImm(getDstUnused());
400
401 // Remove original instruction because it would conflict with our new
402 // instruction by register definition
403 getParentInst()->eraseFromParent();
404 return true;
405}
406
Sam Kolton27e0f8b2017-03-31 11:42:43 +0000407Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
408 if (Op.isImm()) {
409 return Op.getImm();
410 }
411
412 // If this is not immediate then it can be copy of immediate value, e.g.:
413 // %vreg1<def> = S_MOV_B32 255;
414 if (Op.isReg()) {
415 for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
416 if (!isSameReg(Op, Def))
417 continue;
418
419 const MachineInstr *DefInst = Def.getParent();
Sam Koltonaff83412017-04-12 09:36:05 +0000420 if (!TII->isFoldableCopy(*DefInst))
Sam Kolton27e0f8b2017-03-31 11:42:43 +0000421 return None;
422
423 const MachineOperand &Copied = DefInst->getOperand(1);
424 if (!Copied.isImm())
425 return None;
426
427 return Copied.getImm();
428 }
429 }
430
431 return None;
432}
433
Sam Koltonaff83412017-04-12 09:36:05 +0000434void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
435 for (MachineBasicBlock &MBB : MF) {
436 for (MachineInstr &MI : MBB) {
437 unsigned Opcode = MI.getOpcode();
438 switch (Opcode) {
439 case AMDGPU::V_LSHRREV_B32_e32:
440 case AMDGPU::V_ASHRREV_I32_e32:
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000441 case AMDGPU::V_LSHLREV_B32_e32:
442 case AMDGPU::V_LSHRREV_B32_e64:
443 case AMDGPU::V_ASHRREV_I32_e64:
444 case AMDGPU::V_LSHLREV_B32_e64: {
Sam Koltonaff83412017-04-12 09:36:05 +0000445 // from: v_lshrrev_b32_e32 v1, 16/24, v0
446 // to SDWA src:v0 src_sel:WORD_1/BYTE_3
Sam Koltonf60ad582017-03-21 12:51:34 +0000447
Sam Koltonaff83412017-04-12 09:36:05 +0000448 // from: v_ashrrev_i32_e32 v1, 16/24, v0
449 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
Sam Koltonf60ad582017-03-21 12:51:34 +0000450
Sam Koltonaff83412017-04-12 09:36:05 +0000451 // from: v_lshlrev_b32_e32 v1, 16/24, v0
452 // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
453 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
454 auto Imm = foldToImm(*Src0);
455 if (!Imm)
456 break;
457
458 if (*Imm != 16 && *Imm != 24)
459 break;
460
461 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
462 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
463 if (TRI->isPhysicalRegister(Src1->getReg()) ||
464 TRI->isPhysicalRegister(Dst->getReg()))
465 break;
466
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000467 if (Opcode == AMDGPU::V_LSHLREV_B32_e32 ||
468 Opcode == AMDGPU::V_LSHLREV_B32_e64) {
Sam Koltonaff83412017-04-12 09:36:05 +0000469 auto SDWADst = make_unique<SDWADstOperand>(
470 Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
471 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
472 SDWAOperands[&MI] = std::move(SDWADst);
473 ++NumSDWAPatternsFound;
474 } else {
475 auto SDWASrc = make_unique<SDWASrcOperand>(
476 Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000477 Opcode != AMDGPU::V_LSHRREV_B32_e32 &&
478 Opcode != AMDGPU::V_LSHRREV_B32_e64);
Sam Koltonaff83412017-04-12 09:36:05 +0000479 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
480 SDWAOperands[&MI] = std::move(SDWASrc);
481 ++NumSDWAPatternsFound;
482 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000483 break;
Sam Koltonaff83412017-04-12 09:36:05 +0000484 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000485
Sam Koltonaff83412017-04-12 09:36:05 +0000486 case AMDGPU::V_LSHRREV_B16_e32:
487 case AMDGPU::V_ASHRREV_I16_e32:
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000488 case AMDGPU::V_LSHLREV_B16_e32:
489 case AMDGPU::V_LSHRREV_B16_e64:
490 case AMDGPU::V_ASHRREV_I16_e64:
491 case AMDGPU::V_LSHLREV_B16_e64: {
Sam Koltonaff83412017-04-12 09:36:05 +0000492 // from: v_lshrrev_b16_e32 v1, 8, v0
493 // to SDWA src:v0 src_sel:BYTE_1
494
495 // from: v_ashrrev_i16_e32 v1, 8, v0
496 // to SDWA src:v0 src_sel:BYTE_1 sext:1
497
498 // from: v_lshlrev_b16_e32 v1, 8, v0
499 // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
500 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
501 auto Imm = foldToImm(*Src0);
502 if (!Imm || *Imm != 8)
503 break;
504
505 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
506 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
507
508 if (TRI->isPhysicalRegister(Src1->getReg()) ||
509 TRI->isPhysicalRegister(Dst->getReg()))
510 break;
511
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000512 if (Opcode == AMDGPU::V_LSHLREV_B16_e32 ||
513 Opcode == AMDGPU::V_LSHLREV_B16_e64) {
Sam Koltonaff83412017-04-12 09:36:05 +0000514 auto SDWADst =
Sam Koltonebfdaf72017-05-18 12:12:03 +0000515 make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
Sam Koltonaff83412017-04-12 09:36:05 +0000516 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
517 SDWAOperands[&MI] = std::move(SDWADst);
518 ++NumSDWAPatternsFound;
519 } else {
520 auto SDWASrc = make_unique<SDWASrcOperand>(
521 Src1, Dst, BYTE_1, false, false,
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000522 Opcode != AMDGPU::V_LSHRREV_B16_e32 &&
523 Opcode != AMDGPU::V_LSHRREV_B16_e64);
Sam Koltonaff83412017-04-12 09:36:05 +0000524 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
525 SDWAOperands[&MI] = std::move(SDWASrc);
526 ++NumSDWAPatternsFound;
527 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000528 break;
Sam Koltonaff83412017-04-12 09:36:05 +0000529 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000530
Sam Koltonaff83412017-04-12 09:36:05 +0000531 case AMDGPU::V_BFE_I32:
532 case AMDGPU::V_BFE_U32: {
533 // e.g.:
534 // from: v_bfe_u32 v1, v0, 8, 8
535 // to SDWA src:v0 src_sel:BYTE_1
Sam Koltonf60ad582017-03-21 12:51:34 +0000536
Sam Koltonaff83412017-04-12 09:36:05 +0000537 // offset | width | src_sel
538 // ------------------------
539 // 0 | 8 | BYTE_0
540 // 0 | 16 | WORD_0
541 // 0 | 32 | DWORD ?
542 // 8 | 8 | BYTE_1
543 // 16 | 8 | BYTE_2
544 // 16 | 16 | WORD_1
545 // 24 | 8 | BYTE_3
546
547 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
548 auto Offset = foldToImm(*Src1);
549 if (!Offset)
550 break;
551
552 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
553 auto Width = foldToImm(*Src2);
554 if (!Width)
555 break;
556
557 SdwaSel SrcSel = DWORD;
558
559 if (*Offset == 0 && *Width == 8)
560 SrcSel = BYTE_0;
561 else if (*Offset == 0 && *Width == 16)
562 SrcSel = WORD_0;
563 else if (*Offset == 0 && *Width == 32)
564 SrcSel = DWORD;
565 else if (*Offset == 8 && *Width == 8)
566 SrcSel = BYTE_1;
567 else if (*Offset == 16 && *Width == 8)
568 SrcSel = BYTE_2;
569 else if (*Offset == 16 && *Width == 16)
570 SrcSel = WORD_1;
571 else if (*Offset == 24 && *Width == 8)
572 SrcSel = BYTE_3;
573 else
574 break;
575
576 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
577 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
Sam Kolton549c89d2017-06-21 08:53:38 +0000578
Sam Koltonaff83412017-04-12 09:36:05 +0000579 if (TRI->isPhysicalRegister(Src0->getReg()) ||
580 TRI->isPhysicalRegister(Dst->getReg()))
581 break;
582
Sam Koltonf60ad582017-03-21 12:51:34 +0000583 auto SDWASrc = make_unique<SDWASrcOperand>(
Sam Koltonaff83412017-04-12 09:36:05 +0000584 Src0, Dst, SrcSel, false, false,
Eugene Zelenko59e12822017-08-08 00:47:13 +0000585 Opcode != AMDGPU::V_BFE_U32);
Sam Koltonf60ad582017-03-21 12:51:34 +0000586 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
587 SDWAOperands[&MI] = std::move(SDWASrc);
588 ++NumSDWAPatternsFound;
Sam Koltonaff83412017-04-12 09:36:05 +0000589 break;
Sam Koltonf60ad582017-03-21 12:51:34 +0000590 }
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000591 case AMDGPU::V_AND_B32_e32:
592 case AMDGPU::V_AND_B32_e64: {
Sam Koltonaff83412017-04-12 09:36:05 +0000593 // e.g.:
594 // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
595 // to SDWA src:v0 src_sel:WORD_0/BYTE_0
Sam Koltonf60ad582017-03-21 12:51:34 +0000596
Sam Koltonaff83412017-04-12 09:36:05 +0000597 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
Sam Koltonaff83412017-04-12 09:36:05 +0000598 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000599 auto ValSrc = Src1;
600 auto Imm = foldToImm(*Src0);
601
602 if (!Imm) {
603 Imm = foldToImm(*Src1);
604 ValSrc = Src0;
605 }
606
607 if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff))
608 break;
609
Sam Koltonaff83412017-04-12 09:36:05 +0000610 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
Sam Kolton549c89d2017-06-21 08:53:38 +0000611
Sam Koltonaff83412017-04-12 09:36:05 +0000612 if (TRI->isPhysicalRegister(Src1->getReg()) ||
613 TRI->isPhysicalRegister(Dst->getReg()))
614 break;
Sam Koltonf60ad582017-03-21 12:51:34 +0000615
Sam Koltonf60ad582017-03-21 12:51:34 +0000616 auto SDWASrc = make_unique<SDWASrcOperand>(
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000617 ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
Sam Koltonf60ad582017-03-21 12:51:34 +0000618 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
619 SDWAOperands[&MI] = std::move(SDWASrc);
620 ++NumSDWAPatternsFound;
Sam Koltonaff83412017-04-12 09:36:05 +0000621 break;
Sam Koltonf60ad582017-03-21 12:51:34 +0000622 }
Sam Koltonaff83412017-04-12 09:36:05 +0000623 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000624 }
625 }
626}
627
Sam Kolton3c4933f2017-06-22 06:26:41 +0000628bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
629 const SISubtarget &ST) const {
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000630 // Check if this instruction has opcode that supports SDWA
Sam Kolton3c4933f2017-06-22 06:26:41 +0000631 int Opc = MI.getOpcode();
632 if (AMDGPU::getSDWAOp(Opc) == -1)
633 Opc = AMDGPU::getVOPe32(Opc);
634
635 if (Opc == -1 || AMDGPU::getSDWAOp(Opc) == -1)
636 return false;
637
638 if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
639 return false;
640
641 if (TII->isVOPC(Opc)) {
642 if (!ST.hasSDWASdst()) {
643 const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
644 if (SDst && SDst->getReg() != AMDGPU::VCC)
645 return false;
646 }
647
Sam Koltona179d252017-06-27 15:02:23 +0000648 if (!ST.hasSDWAOutModsVOPC() &&
649 (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
650 TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
Sam Kolton549c89d2017-06-21 08:53:38 +0000651 return false;
652
Sam Koltona179d252017-06-27 15:02:23 +0000653 } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
654 !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
Sam Kolton3c4933f2017-06-22 06:26:41 +0000655 return false;
Sam Kolton549c89d2017-06-21 08:53:38 +0000656 }
Sam Kolton3c4933f2017-06-22 06:26:41 +0000657
658 if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 ||
659 Opc == AMDGPU::V_MAC_F32_e32))
660 return false;
661
662 return true;
Sam Koltonebfdaf72017-05-18 12:12:03 +0000663}
664
665bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
666 const SDWAOperandsVector &SDWAOperands) {
Sam Koltonf60ad582017-03-21 12:51:34 +0000667 // Convert to sdwa
668 int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000669 if (SDWAOpcode == -1)
670 SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
Sam Koltonf60ad582017-03-21 12:51:34 +0000671 assert(SDWAOpcode != -1);
672
673 const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
674
675 // Create SDWA version of instruction MI and initialize its operands
676 MachineInstrBuilder SDWAInst =
677 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
678
Sam Koltona179d252017-06-27 15:02:23 +0000679 // Copy dst, if it is present in original then should also be present in SDWA
680 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
Sam Koltonf60ad582017-03-21 12:51:34 +0000681 if (Dst) {
682 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
683 SDWAInst.add(*Dst);
Sam Koltona179d252017-06-27 15:02:23 +0000684 } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
Sam Kolton549c89d2017-06-21 08:53:38 +0000685 assert(Dst &&
686 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
687 SDWAInst.add(*Dst);
Sam Koltona179d252017-06-27 15:02:23 +0000688 } else {
689 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
690 SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
Sam Koltonf60ad582017-03-21 12:51:34 +0000691 }
692
693 // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
694 // src0_modifiers (except for v_nop_sdwa, but it can't get here)
695 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
696 assert(
697 Src0 &&
698 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
699 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000700 if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers))
701 SDWAInst.addImm(Mod->getImm());
702 else
703 SDWAInst.addImm(0);
Sam Koltonf60ad582017-03-21 12:51:34 +0000704 SDWAInst.add(*Src0);
705
706 // Copy src1 if present, initialize src1_modifiers.
707 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
708 if (Src1) {
709 assert(
710 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
711 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
Stanislav Mekhanoshin03306602017-06-03 17:39:47 +0000712 if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers))
713 SDWAInst.addImm(Mod->getImm());
714 else
715 SDWAInst.addImm(0);
Sam Koltonf60ad582017-03-21 12:51:34 +0000716 SDWAInst.add(*Src1);
Sam Koltonf60ad582017-03-21 12:51:34 +0000717 }
718
719 if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
720 SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
721 // v_mac_f16/32 has additional src2 operand tied to vdst
722 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
723 assert(Src2);
724 SDWAInst.add(*Src2);
725 }
726
Sam Kolton3c4933f2017-06-22 06:26:41 +0000727 // Copy clamp if present, initialize otherwise
728 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
729 MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp);
730 if (Clamp) {
731 SDWAInst.add(*Clamp);
732 } else {
Sam Kolton549c89d2017-06-21 08:53:38 +0000733 SDWAInst.addImm(0);
Sam Kolton3c4933f2017-06-22 06:26:41 +0000734 }
Sam Kolton549c89d2017-06-21 08:53:38 +0000735
Sam Kolton3c4933f2017-06-22 06:26:41 +0000736 // Copy omod if present, initialize otherwise if needed
Sam Koltona179d252017-06-27 15:02:23 +0000737 if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
738 MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
739 if (OMod) {
740 SDWAInst.add(*OMod);
741 } else {
742 SDWAInst.addImm(0);
743 }
Sam Kolton3c4933f2017-06-22 06:26:41 +0000744 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000745
Sam Koltona179d252017-06-27 15:02:23 +0000746 // Initialize dst_sel if present
747 if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
Sam Koltonf60ad582017-03-21 12:51:34 +0000748 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
Sam Koltona179d252017-06-27 15:02:23 +0000749 }
750
751 // Initialize dst_unused if present
752 if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
Sam Koltonf60ad582017-03-21 12:51:34 +0000753 SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
754 }
755
756 // Initialize src0_sel
757 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1);
758 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
759
760
761 // Initialize src1_sel if present
762 if (Src1) {
763 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1);
764 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
765 }
766
767 // Apply all sdwa operand pattenrs
768 bool Converted = false;
769 for (auto &Operand : SDWAOperands) {
Sam Koltonebfdaf72017-05-18 12:12:03 +0000770 // There should be no intesection between SDWA operands and potential MIs
771 // e.g.:
772 // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0
773 // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0
774 // v_add_u32 v3, v4, v2
775 //
776 // In that example it is possible that we would fold 2nd instruction into 3rd
777 // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was
778 // already destroyed). So if SDWAOperand is also a potential MI then do not
779 // apply it.
780 if (PotentialMatches.count(Operand->getParentInst()) == 0)
781 Converted |= Operand->convertToSDWA(*SDWAInst, TII);
Sam Koltonf60ad582017-03-21 12:51:34 +0000782 }
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000783 if (Converted) {
784 ConvertedInstructions.push_back(SDWAInst);
785 } else {
Sam Koltonf60ad582017-03-21 12:51:34 +0000786 SDWAInst->eraseFromParent();
787 return false;
788 }
789
790 DEBUG(dbgs() << "Convert instruction:" << MI
791 << "Into:" << *SDWAInst << '\n');
792 ++NumSDWAInstructionsPeepholed;
793
794 MI.eraseFromParent();
795 return true;
796}
797
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000798// If an instruction was converted to SDWA it should not have immediates or SGPR
Sam Kolton3c4933f2017-06-22 06:26:41 +0000799// operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs.
800void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const {
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000801 const MCInstrDesc &Desc = TII->get(MI.getOpcode());
Sam Kolton3c4933f2017-06-22 06:26:41 +0000802 unsigned ConstantBusCount = 0;
803 for (MachineOperand &Op: MI.explicit_uses()) {
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000804 if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
805 continue;
Sam Kolton3c4933f2017-06-22 06:26:41 +0000806
807 unsigned I = MI.getOperandNo(&Op);
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000808 if (Desc.OpInfo[I].RegClass == -1 ||
809 !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
810 continue;
Sam Kolton3c4933f2017-06-22 06:26:41 +0000811
812 if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
813 TRI->isSGPRReg(*MRI, Op.getReg())) {
814 ++ConstantBusCount;
815 continue;
816 }
817
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000818 unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
819 auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
820 TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
821 if (Op.isImm())
822 Copy.addImm(Op.getImm());
823 else if (Op.isReg())
824 Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0,
825 Op.getSubReg());
826 Op.ChangeToRegister(VGPR, false);
827 }
828}
829
Sam Koltonf60ad582017-03-21 12:51:34 +0000830bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
831 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
832
Matt Arsenaultf42074b2017-10-10 20:48:36 +0000833 if (!ST.hasSDWA() || skipFunction(*MF.getFunction()))
Sam Koltonf60ad582017-03-21 12:51:34 +0000834 return false;
Sam Koltonf60ad582017-03-21 12:51:34 +0000835
836 MRI = &MF.getRegInfo();
837 TRI = ST.getRegisterInfo();
838 TII = ST.getInstrInfo();
Sam Kolton549c89d2017-06-21 08:53:38 +0000839
Sam Koltonebfdaf72017-05-18 12:12:03 +0000840 // Find all SDWA operands in MF.
Sam Koltonaff83412017-04-12 09:36:05 +0000841 matchSDWAOperands(MF);
Sam Koltonf60ad582017-03-21 12:51:34 +0000842
Sam Koltonebfdaf72017-05-18 12:12:03 +0000843 for (const auto &OperandPair : SDWAOperands) {
844 const auto &Operand = OperandPair.second;
Sam Koltonaff83412017-04-12 09:36:05 +0000845 MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
Sam Kolton3c4933f2017-06-22 06:26:41 +0000846 if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) {
Sam Koltonebfdaf72017-05-18 12:12:03 +0000847 PotentialMatches[PotentialMI].push_back(Operand.get());
Sam Koltonf60ad582017-03-21 12:51:34 +0000848 }
849 }
Sam Koltonaff83412017-04-12 09:36:05 +0000850
851 for (auto &PotentialPair : PotentialMatches) {
852 MachineInstr &PotentialMI = *PotentialPair.first;
853 convertToSDWA(PotentialMI, PotentialPair.second);
854 }
855
Sam Koltonebfdaf72017-05-18 12:12:03 +0000856 PotentialMatches.clear();
Sam Koltonaff83412017-04-12 09:36:05 +0000857 SDWAOperands.clear();
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000858
Stanislav Mekhanoshine4cda742017-06-06 16:42:30 +0000859 bool Ret = !ConvertedInstructions.empty();
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000860 while (!ConvertedInstructions.empty())
Sam Kolton3c4933f2017-06-22 06:26:41 +0000861 legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST);
Stanislav Mekhanoshin56ea4882017-05-30 16:49:24 +0000862
Stanislav Mekhanoshine4cda742017-06-06 16:42:30 +0000863 return Ret;
Sam Koltonf60ad582017-03-21 12:51:34 +0000864}