blob: d2844c39430cd7b748ad19a240c019b20613b225 [file] [log] [blame]
Sam Koltonf60ad582017-03-21 12:51:34 +00001//===-- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions --===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file This pass tries to apply several peephole SDWA patterns.
11///
12/// E.g. original:
13/// V_LSHRREV_B32_e32 %vreg0, 16, %vreg1
14/// V_ADD_I32_e32 %vreg2, %vreg0, %vreg3
15/// V_LSHLREV_B32_e32 %vreg4, 16, %vreg2
16///
17/// Replace:
18/// V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3
19/// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
20///
21//===----------------------------------------------------------------------===//
22
23
24#include "AMDGPU.h"
25#include "AMDGPUSubtarget.h"
26#include "SIDefines.h"
27#include "SIInstrInfo.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include <unordered_map>
33
34using namespace llvm;
35
36#define DEBUG_TYPE "si-peephole-sdwa"
37
38STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
39STATISTIC(NumSDWAInstructionsPeepholed,
40 "Number of instruction converted to SDWA.");
41
42namespace {
43
44class SDWAOperand;
45
46class SIPeepholeSDWA : public MachineFunctionPass {
47private:
48 MachineRegisterInfo *MRI;
49 const SIRegisterInfo *TRI;
50 const SIInstrInfo *TII;
51
52 std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
53
54public:
55 static char ID;
56
57 typedef SmallVector<std::unique_ptr<SDWAOperand>, 4> SDWAOperandsVector;
58
59 SIPeepholeSDWA() : MachineFunctionPass(ID) {
60 initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
61 }
62
63 bool runOnMachineFunction(MachineFunction &MF) override;
64 void matchSDWAOperands(MachineBasicBlock &MBB);
65 bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
66
67 StringRef getPassName() const override { return "SI Peephole SDWA"; }
68
69 void getAnalysisUsage(AnalysisUsage &AU) const override {
70 AU.setPreservesCFG();
71 MachineFunctionPass::getAnalysisUsage(AU);
72 }
73};
74
75class SDWAOperand {
76private:
77 MachineOperand *Target; // Operand that would be used in converted instruction
78 MachineOperand *Replaced; // Operand that would be replace by Target
79
80public:
81 SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
82 : Target(TargetOp), Replaced(ReplacedOp) {
83 assert(Target->isReg());
84 assert(Replaced->isReg());
85 }
86
87 virtual ~SDWAOperand() {}
88
89 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
90 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
91
92 MachineOperand *getTargetOperand() const { return Target; }
93 MachineOperand *getReplacedOperand() const { return Replaced; }
94 MachineInstr *getParentInst() const { return Target->getParent(); }
95 MachineRegisterInfo *getMRI() const {
96 return &getParentInst()->getParent()->getParent()->getRegInfo();
97 }
98};
99
100using namespace AMDGPU::SDWA;
101
102class SDWASrcOperand : public SDWAOperand {
103private:
104 SdwaSel SrcSel;
105 bool Abs;
106 bool Neg;
107 bool Sext;
108
109public:
110 SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
111 SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
112 bool Sext_ = false)
113 : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
114 Neg(Neg_), Sext(Sext_) {}
115
116 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
117 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
118
119 SdwaSel getSrcSel() const { return SrcSel; }
120 bool getAbs() const { return Abs; }
121 bool getNeg() const { return Neg; }
122 bool getSext() const { return Sext; }
123
124 uint64_t getSrcMods() const;
125};
126
127class SDWADstOperand : public SDWAOperand {
128private:
129 SdwaSel DstSel;
130 DstUnused DstUn;
131
132public:
133 SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
134 SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
135 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
136
137 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
138 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
139
140 SdwaSel getDstSel() const { return DstSel; }
141 DstUnused getDstUnused() const { return DstUn; }
142};
143
144} // End anonymous namespace.
145
146INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
147
148char SIPeepholeSDWA::ID = 0;
149
150char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
151
152FunctionPass *llvm::createSIPeepholeSDWAPass() {
153 return new SIPeepholeSDWA();
154}
155
156#ifndef NDEBUG
157
158static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) {
159 switch(Sel) {
160 case BYTE_0: OS << "BYTE_0"; break;
161 case BYTE_1: OS << "BYTE_1"; break;
162 case BYTE_2: OS << "BYTE_2"; break;
163 case BYTE_3: OS << "BYTE_3"; break;
164 case WORD_0: OS << "WORD_0"; break;
165 case WORD_1: OS << "WORD_1"; break;
166 case DWORD: OS << "DWORD"; break;
167 }
168 return OS;
169}
170
171static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
172 switch(Un) {
173 case UNUSED_PAD: OS << "UNUSED_PAD"; break;
174 case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
175 case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
176 }
177 return OS;
178}
179
180static raw_ostream& operator<<(raw_ostream &OS, const SDWASrcOperand &Src) {
181 OS << "SDWA src: " << *Src.getTargetOperand()
182 << " src_sel:" << Src.getSrcSel()
183 << " abs:" << Src.getAbs() << " neg:" << Src.getNeg()
184 << " sext:" << Src.getSext() << '\n';
185 return OS;
186}
187
188static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) {
189 OS << "SDWA dst: " << *Dst.getTargetOperand()
190 << " dst_sel:" << Dst.getDstSel()
191 << " dst_unused:" << Dst.getDstUnused() << '\n';
192 return OS;
193}
194
195#endif
196
197static bool isSameBB(const MachineInstr *FirstMI, const MachineInstr *SecondMI) {
198 assert(FirstMI && SecondMI);
199 return FirstMI->getParent() == SecondMI->getParent();
200}
201
202static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
203 assert(To.isReg() && From.isReg());
204 To.setReg(From.getReg());
205 To.setSubReg(From.getSubReg());
206 To.setIsUndef(From.isUndef());
207 if (To.isUse()) {
208 To.setIsKill(From.isKill());
209 } else {
210 To.setIsDead(From.isDead());
211 }
212}
213
214static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
215 return LHS.isReg() &&
216 RHS.isReg() &&
217 LHS.getReg() == RHS.getReg() &&
218 LHS.getSubReg() == RHS.getSubReg();
219}
220
221static bool isSubregOf(const MachineOperand &SubReg,
222 const MachineOperand &SuperReg,
223 const TargetRegisterInfo *TRI) {
224
225 if (!SuperReg.isReg() || !SubReg.isReg())
226 return false;
227
228 if (isSameReg(SuperReg, SubReg))
229 return true;
230
231 if (SuperReg.getReg() != SubReg.getReg())
232 return false;
233
234 LaneBitmask::Type SuperMask =
235 TRI->getSubRegIndexLaneMask(SuperReg.getSubReg()).getAsInteger();
236 LaneBitmask::Type SubMask =
237 TRI->getSubRegIndexLaneMask(SubReg.getSubReg()).getAsInteger();
238 return TRI->regmaskSubsetEqual(&SubMask, &SuperMask);
239}
240
241uint64_t SDWASrcOperand::getSrcMods() const {
242 uint64_t Mods = 0;
243 if (Abs || Neg) {
244 assert(!Sext &&
245 "Float and integer src modifiers can't be set simulteniously");
246 Mods |= Abs ? SISrcMods::ABS : 0;
247 Mods |= Neg ? SISrcMods::NEG : 0;
248 } else if (Sext) {
249 Mods |= SISrcMods::SEXT;
250 }
251
252 return Mods;
253}
254
255MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
256 // For SDWA src operand potential instruction is one that use register
257 // defined by parent instruction
258 MachineRegisterInfo *MRI = getMRI();
259 MachineOperand *Replaced = getReplacedOperand();
260 assert(Replaced->isReg());
261
262 MachineInstr *PotentialMI = nullptr;
263 for (MachineOperand &PotentialMO : MRI->use_operands(Replaced->getReg())) {
264 // If this is use of another subreg of dst reg then do nothing
265 if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
266 continue;
267
268 // If there exist use of dst in another basic block or use of superreg of
269 // dst then we should not combine this opernad
270 if (!isSameBB(PotentialMO.getParent(), getParentInst()) ||
271 !isSameReg(PotentialMO, *Replaced))
272 return nullptr;
273
274 // Check that PotentialMI is only instruction that uses dst reg
275 if (PotentialMI == nullptr) {
276 PotentialMI = PotentialMO.getParent();
277 } else if (PotentialMI != PotentialMO.getParent()) {
278 return nullptr;
279 }
280 }
281
282 return PotentialMI;
283}
284
285bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
286 // Find operand in instruction that matches source operand and replace it with
287 // target operand. Set corresponding src_sel
288
289 MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
290 MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
291 MachineOperand *SrcMods =
292 TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
293 assert(Src && Src->isReg());
294 if (!isSameReg(*Src, *getReplacedOperand())) {
295 // If this is not src0 then it should be src1
296 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
297 SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
298 SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
299
300 assert(Src && Src->isReg());
301
302 if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
303 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
304 !isSameReg(*Src, *getReplacedOperand())) {
305 // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
306 // src2. This is not allowed.
307 return false;
308 }
309
310 assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods);
311 }
312 copyRegOperand(*Src, *getTargetOperand());
313 SrcSel->setImm(getSrcSel());
314 SrcMods->setImm(getSrcMods());
315 getTargetOperand()->setIsKill(false);
316 return true;
317}
318
319MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
320 // For SDWA dst operand potential instruction is one that defines register
321 // that this operand uses
322 MachineRegisterInfo *MRI = getMRI();
323 MachineInstr *ParentMI = getParentInst();
324 MachineOperand *Replaced = getReplacedOperand();
325 assert(Replaced->isReg());
326
327 for (MachineOperand &PotentialMO : MRI->def_operands(Replaced->getReg())) {
328 if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
329 continue;
330
331 if (!isSameBB(getParentInst(), PotentialMO.getParent()) ||
332 !isSameReg(*Replaced, PotentialMO))
333 return nullptr;
334
335 // Check that ParentMI is the only instruction that uses replaced register
336 for (MachineOperand &UseMO : MRI->use_operands(PotentialMO.getReg())) {
337 if (isSubregOf(UseMO, PotentialMO, MRI->getTargetRegisterInfo()) &&
338 UseMO.getParent() != ParentMI) {
339 return nullptr;
340 }
341 }
342
343 // Due to SSA this should be onle def of replaced register, so return it
344 return PotentialMO.getParent();
345 }
346
347 return nullptr;
348}
349
350bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
351 // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
352
353 if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
354 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
355 getDstSel() != AMDGPU::SDWA::DWORD) {
356 // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
357 return false;
358 }
359
360 MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
361 assert(Operand &&
362 Operand->isReg() &&
363 isSameReg(*Operand, *getReplacedOperand()));
364 copyRegOperand(*Operand, *getTargetOperand());
365 MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
366 assert(DstSel);
367 DstSel->setImm(getDstSel());
368 MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
369 assert(DstUnused);
370 DstUnused->setImm(getDstUnused());
371
372 // Remove original instruction because it would conflict with our new
373 // instruction by register definition
374 getParentInst()->eraseFromParent();
375 return true;
376}
377
378void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
379 for (MachineInstr &MI : MBB) {
380 unsigned Opcode = MI.getOpcode();
381 switch (Opcode) {
382 case AMDGPU::V_LSHRREV_B32_e32:
383 case AMDGPU::V_ASHRREV_I32_e32:
384 case AMDGPU::V_LSHLREV_B32_e32: {
385 // from: v_lshrrev_b32_e32 v1, 16/24, v0
386 // to SDWA src:v0 src_sel:WORD_1/BYTE_3
387
388 // from: v_ashrrev_i32_e32 v1, 16/24, v0
389 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
390
391 // from: v_lshlrev_b32_e32 v1, 16/24, v0
392 // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
393 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
394 if (!Src0->isImm())
395 break;
396
397 int64_t Imm = Src0->getImm();
398 if (Imm != 16 && Imm != 24)
399 break;
400
401 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
402 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
403 if (TRI->isPhysicalRegister(Src1->getReg()) ||
404 TRI->isPhysicalRegister(Dst->getReg()))
405 break;
406
407 if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
408 auto SDWADst = make_unique<SDWADstOperand>(
409 Dst, Src1, Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
410 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
411 SDWAOperands[&MI] = std::move(SDWADst);
412 ++NumSDWAPatternsFound;
413 } else {
414 auto SDWASrc = make_unique<SDWASrcOperand>(
415 Src1, Dst, Imm == 16 ? WORD_1 : BYTE_3, false, false,
416 Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
417 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
418 SDWAOperands[&MI] = std::move(SDWASrc);
419 ++NumSDWAPatternsFound;
420 }
421 break;
422 }
423
424 case AMDGPU::V_LSHRREV_B16_e32:
425 case AMDGPU::V_ASHRREV_I16_e32:
426 case AMDGPU::V_LSHLREV_B16_e32: {
427 // from: v_lshrrev_b16_e32 v1, 8, v0
428 // to SDWA src:v0 src_sel:BYTE_1
429
430 // from: v_ashrrev_i16_e32 v1, 8, v0
431 // to SDWA src:v0 src_sel:BYTE_1 sext:1
432
433 // from: v_lshlrev_b16_e32 v1, 8, v0
434 // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
435 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
436 if (!Src0->isImm() || Src0->getImm() != 8)
437 break;
438
439 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
440 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
441
442 if (TRI->isPhysicalRegister(Src1->getReg()) ||
443 TRI->isPhysicalRegister(Dst->getReg()))
444 break;
445
446 if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
447 auto SDWADst =
448 make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
449 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
450 SDWAOperands[&MI] = std::move(SDWADst);
451 ++NumSDWAPatternsFound;
452 } else {
453 auto SDWASrc = make_unique<SDWASrcOperand>(
454 Src1, Dst, BYTE_1, false, false,
455 Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
456 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
457 SDWAOperands[&MI] = std::move(SDWASrc);
458 ++NumSDWAPatternsFound;
459 }
460 break;
461 }
462
463 case AMDGPU::V_BFE_I32:
464 case AMDGPU::V_BFE_U32: {
465 // e.g.:
466 // from: v_bfe_u32 v1, v0, 8, 8
467 // to SDWA src:v0 src_sel:BYTE_1
468
469 // offset | width | src_sel
470 // ------------------------
471 // 0 | 8 | BYTE_0
472 // 0 | 16 | WORD_0
473 // 0 | 32 | DWORD ?
474 // 8 | 8 | BYTE_1
475 // 16 | 8 | BYTE_2
476 // 16 | 16 | WORD_1
477 // 24 | 8 | BYTE_3
478
479 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
480 if (!Src1->isImm())
481 break;
482 int64_t Offset = Src1->getImm();
483
484 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
485 if (!Src2->isImm())
486 break;
487 int64_t Width = Src2->getImm();
488
489 SdwaSel SrcSel = DWORD;
490
491 if (Offset == 0 && Width == 8)
492 SrcSel = BYTE_0;
493 else if (Offset == 0 && Width == 16)
494 SrcSel = WORD_0;
495 else if (Offset == 0 && Width == 32)
496 SrcSel = DWORD;
497 else if (Offset == 8 && Width == 8)
498 SrcSel = BYTE_1;
499 else if (Offset == 16 && Width == 8)
500 SrcSel = BYTE_2;
501 else if (Offset == 16 && Width == 16)
502 SrcSel = WORD_1;
503 else if (Offset == 24 && Width == 8)
504 SrcSel = BYTE_3;
505 else
506 break;
507
508 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
509 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
510
511 if (TRI->isPhysicalRegister(Src0->getReg()) ||
512 TRI->isPhysicalRegister(Dst->getReg()))
513 break;
514
515 auto SDWASrc = make_unique<SDWASrcOperand>(
516 Src0, Dst, SrcSel, false, false,
517 Opcode == AMDGPU::V_BFE_U32 ? false : true);
518 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
519 SDWAOperands[&MI] = std::move(SDWASrc);
520 ++NumSDWAPatternsFound;
521 break;
522 }
523 case AMDGPU::V_AND_B32_e32: {
524 // e.g.:
525 // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
526 // to SDWA src:v0 src_sel:WORD_0/BYTE_0
527
528 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
529 if (!Src0->isImm())
530 break;
531
532 int64_t Imm = Src0->getImm();
533 if (Imm != 0x0000ffff && Imm != 0x000000ff)
534 break;
535
536 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
537 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
538
539 if (TRI->isPhysicalRegister(Src1->getReg()) ||
540 TRI->isPhysicalRegister(Dst->getReg()))
541 break;
542
543 auto SDWASrc = make_unique<SDWASrcOperand>(
544 Src1, Dst, Imm == 0x0000ffff ? WORD_0 : BYTE_0);
545 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
546 SDWAOperands[&MI] = std::move(SDWASrc);
547 ++NumSDWAPatternsFound;
548 break;
549 }
550 }
551 }
552}
553
554bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
555 const SDWAOperandsVector &SDWAOperands) {
556 // Check if this instruction can be converted to SDWA:
557 // 1. Does this opcode support SDWA
558 if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1)
559 return false;
560
561 // 2. Are all operands - VGPRs
562 for (const MachineOperand &Operand : MI.explicit_operands()) {
563 if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg()))
564 return false;
565 }
566
567 // Convert to sdwa
568 int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
569 assert(SDWAOpcode != -1);
570
571 const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
572
573 // Create SDWA version of instruction MI and initialize its operands
574 MachineInstrBuilder SDWAInst =
575 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
576
577 // Copy dst, if it is present in original then should also be present in SDWA
578 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
579 if (Dst) {
580 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
581 SDWAInst.add(*Dst);
582 } else {
583 assert(TII->isVOPC(MI));
584 }
585
586 // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
587 // src0_modifiers (except for v_nop_sdwa, but it can't get here)
588 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
589 assert(
590 Src0 &&
591 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
592 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
593 SDWAInst.addImm(0);
594 SDWAInst.add(*Src0);
595
596 // Copy src1 if present, initialize src1_modifiers.
597 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
598 if (Src1) {
599 assert(
600 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
601 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
602 SDWAInst.addImm(0);
603 SDWAInst.add(*Src1);
604 } else {
605 assert(TII->isVOP1(MI));
606 }
607
608 if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
609 SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
610 // v_mac_f16/32 has additional src2 operand tied to vdst
611 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
612 assert(Src2);
613 SDWAInst.add(*Src2);
614 }
615
616 // Initialize clamp.
617 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
618 SDWAInst.addImm(0);
619
620 // Initialize dst_sel and dst_unused if present
621 if (Dst) {
622 assert(
623 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
624 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
625 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
626 SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
627 }
628
629 // Initialize src0_sel
630 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1);
631 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
632
633
634 // Initialize src1_sel if present
635 if (Src1) {
636 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1);
637 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
638 }
639
640 // Apply all sdwa operand pattenrs
641 bool Converted = false;
642 for (auto &Operand : SDWAOperands) {
643 Converted |= Operand->convertToSDWA(*SDWAInst, TII);
644 }
645 if (!Converted) {
646 SDWAInst->eraseFromParent();
647 return false;
648 }
649
650 DEBUG(dbgs() << "Convert instruction:" << MI
651 << "Into:" << *SDWAInst << '\n');
652 ++NumSDWAInstructionsPeepholed;
653
654 MI.eraseFromParent();
655 return true;
656}
657
658bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
659 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
660
661 if (!ST.hasSDWA() ||
662 !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9
663 return false;
664 }
665
666 MRI = &MF.getRegInfo();
667 TRI = ST.getRegisterInfo();
668 TII = ST.getInstrInfo();
669
670 std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
671
672 // FIXME: For now we only combine instructions in one basic block
673 for (MachineBasicBlock &MBB : MF) {
674 SDWAOperands.clear();
675 matchSDWAOperands(MBB);
676
677 PotentialMatches.clear();
678 for (auto &OperandPair : SDWAOperands) {
679 auto &Operand = OperandPair.second;
680 MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
681 if (PotentialMI) {
682 PotentialMatches[PotentialMI].push_back(std::move(Operand));
683 }
684 }
685
686 for (auto &PotentialPair : PotentialMatches) {
687 MachineInstr &PotentialMI = *PotentialPair.first;
688 convertToSDWA(PotentialMI, PotentialPair.second);
689 }
690 }
691 return false;
692}