blob: e02c2e3240e849409404ca4f0999405b90781776 [file] [log] [blame]
Sam Koltonf60ad582017-03-21 12:51:34 +00001//===-- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions --===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file This pass tries to apply several peephole SDWA patterns.
11///
12/// E.g. original:
13/// V_LSHRREV_B32_e32 %vreg0, 16, %vreg1
14/// V_ADD_I32_e32 %vreg2, %vreg0, %vreg3
15/// V_LSHLREV_B32_e32 %vreg4, 16, %vreg2
16///
17/// Replace:
18/// V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3
19/// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
20///
21//===----------------------------------------------------------------------===//
22
23
24#include "AMDGPU.h"
25#include "AMDGPUSubtarget.h"
26#include "SIDefines.h"
27#include "SIInstrInfo.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/CodeGen/MachineFunctionPass.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include <unordered_map>
33
34using namespace llvm;
35
36#define DEBUG_TYPE "si-peephole-sdwa"
37
38STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
39STATISTIC(NumSDWAInstructionsPeepholed,
40 "Number of instruction converted to SDWA.");
41
42namespace {
43
44class SDWAOperand;
45
46class SIPeepholeSDWA : public MachineFunctionPass {
47private:
48 MachineRegisterInfo *MRI;
49 const SIRegisterInfo *TRI;
50 const SIInstrInfo *TII;
51
52 std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
53
Sam Kolton27e0f8b2017-03-31 11:42:43 +000054 Optional<int64_t> foldToImm(const MachineOperand &Op) const;
55
Sam Koltonf60ad582017-03-21 12:51:34 +000056public:
57 static char ID;
58
59 typedef SmallVector<std::unique_ptr<SDWAOperand>, 4> SDWAOperandsVector;
60
61 SIPeepholeSDWA() : MachineFunctionPass(ID) {
62 initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
63 }
64
65 bool runOnMachineFunction(MachineFunction &MF) override;
Sam Koltonaff83412017-04-12 09:36:05 +000066 void matchSDWAOperands(MachineFunction &MF);
Sam Koltonf60ad582017-03-21 12:51:34 +000067 bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
68
69 StringRef getPassName() const override { return "SI Peephole SDWA"; }
70
71 void getAnalysisUsage(AnalysisUsage &AU) const override {
72 AU.setPreservesCFG();
73 MachineFunctionPass::getAnalysisUsage(AU);
74 }
75};
76
77class SDWAOperand {
78private:
79 MachineOperand *Target; // Operand that would be used in converted instruction
80 MachineOperand *Replaced; // Operand that would be replace by Target
81
82public:
83 SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
84 : Target(TargetOp), Replaced(ReplacedOp) {
85 assert(Target->isReg());
86 assert(Replaced->isReg());
87 }
88
89 virtual ~SDWAOperand() {}
90
91 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
92 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
93
94 MachineOperand *getTargetOperand() const { return Target; }
95 MachineOperand *getReplacedOperand() const { return Replaced; }
96 MachineInstr *getParentInst() const { return Target->getParent(); }
97 MachineRegisterInfo *getMRI() const {
98 return &getParentInst()->getParent()->getParent()->getRegInfo();
99 }
100};
101
102using namespace AMDGPU::SDWA;
103
104class SDWASrcOperand : public SDWAOperand {
105private:
106 SdwaSel SrcSel;
107 bool Abs;
108 bool Neg;
109 bool Sext;
110
111public:
112 SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
113 SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
114 bool Sext_ = false)
115 : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
116 Neg(Neg_), Sext(Sext_) {}
117
118 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
119 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
120
121 SdwaSel getSrcSel() const { return SrcSel; }
122 bool getAbs() const { return Abs; }
123 bool getNeg() const { return Neg; }
124 bool getSext() const { return Sext; }
125
126 uint64_t getSrcMods() const;
127};
128
129class SDWADstOperand : public SDWAOperand {
130private:
131 SdwaSel DstSel;
132 DstUnused DstUn;
133
134public:
135 SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
136 SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
137 : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
138
139 virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
140 virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
141
142 SdwaSel getDstSel() const { return DstSel; }
143 DstUnused getDstUnused() const { return DstUn; }
144};
145
146} // End anonymous namespace.
147
148INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
149
150char SIPeepholeSDWA::ID = 0;
151
152char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
153
154FunctionPass *llvm::createSIPeepholeSDWAPass() {
155 return new SIPeepholeSDWA();
156}
157
158#ifndef NDEBUG
159
160static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) {
161 switch(Sel) {
162 case BYTE_0: OS << "BYTE_0"; break;
163 case BYTE_1: OS << "BYTE_1"; break;
164 case BYTE_2: OS << "BYTE_2"; break;
165 case BYTE_3: OS << "BYTE_3"; break;
166 case WORD_0: OS << "WORD_0"; break;
167 case WORD_1: OS << "WORD_1"; break;
168 case DWORD: OS << "DWORD"; break;
169 }
170 return OS;
171}
172
173static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
174 switch(Un) {
175 case UNUSED_PAD: OS << "UNUSED_PAD"; break;
176 case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
177 case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
178 }
179 return OS;
180}
181
182static raw_ostream& operator<<(raw_ostream &OS, const SDWASrcOperand &Src) {
183 OS << "SDWA src: " << *Src.getTargetOperand()
184 << " src_sel:" << Src.getSrcSel()
185 << " abs:" << Src.getAbs() << " neg:" << Src.getNeg()
186 << " sext:" << Src.getSext() << '\n';
187 return OS;
188}
189
190static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) {
191 OS << "SDWA dst: " << *Dst.getTargetOperand()
192 << " dst_sel:" << Dst.getDstSel()
193 << " dst_unused:" << Dst.getDstUnused() << '\n';
194 return OS;
195}
196
197#endif
198
Sam Koltonf60ad582017-03-21 12:51:34 +0000199static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
200 assert(To.isReg() && From.isReg());
201 To.setReg(From.getReg());
202 To.setSubReg(From.getSubReg());
203 To.setIsUndef(From.isUndef());
204 if (To.isUse()) {
205 To.setIsKill(From.isKill());
206 } else {
207 To.setIsDead(From.isDead());
208 }
209}
210
211static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
212 return LHS.isReg() &&
213 RHS.isReg() &&
214 LHS.getReg() == RHS.getReg() &&
215 LHS.getSubReg() == RHS.getSubReg();
216}
217
218static bool isSubregOf(const MachineOperand &SubReg,
219 const MachineOperand &SuperReg,
220 const TargetRegisterInfo *TRI) {
221
222 if (!SuperReg.isReg() || !SubReg.isReg())
223 return false;
224
225 if (isSameReg(SuperReg, SubReg))
226 return true;
227
228 if (SuperReg.getReg() != SubReg.getReg())
229 return false;
230
Sam Kolton9fa16962017-04-06 15:03:28 +0000231 LaneBitmask SuperMask = TRI->getSubRegIndexLaneMask(SuperReg.getSubReg());
232 LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubReg.getSubReg());
233 SuperMask |= ~SubMask;
234 return SuperMask.all();
Sam Koltonf60ad582017-03-21 12:51:34 +0000235}
236
237uint64_t SDWASrcOperand::getSrcMods() const {
238 uint64_t Mods = 0;
239 if (Abs || Neg) {
240 assert(!Sext &&
241 "Float and integer src modifiers can't be set simulteniously");
242 Mods |= Abs ? SISrcMods::ABS : 0;
243 Mods |= Neg ? SISrcMods::NEG : 0;
244 } else if (Sext) {
245 Mods |= SISrcMods::SEXT;
246 }
247
248 return Mods;
249}
250
251MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
252 // For SDWA src operand potential instruction is one that use register
253 // defined by parent instruction
254 MachineRegisterInfo *MRI = getMRI();
255 MachineOperand *Replaced = getReplacedOperand();
256 assert(Replaced->isReg());
257
258 MachineInstr *PotentialMI = nullptr;
259 for (MachineOperand &PotentialMO : MRI->use_operands(Replaced->getReg())) {
260 // If this is use of another subreg of dst reg then do nothing
261 if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
262 continue;
263
Sam Koltonaff83412017-04-12 09:36:05 +0000264 // If there exist use of superreg of dst then we should not combine this
265 // opernad
266 if (!isSameReg(PotentialMO, *Replaced))
Sam Koltonf60ad582017-03-21 12:51:34 +0000267 return nullptr;
268
269 // Check that PotentialMI is only instruction that uses dst reg
270 if (PotentialMI == nullptr) {
271 PotentialMI = PotentialMO.getParent();
272 } else if (PotentialMI != PotentialMO.getParent()) {
273 return nullptr;
274 }
275 }
276
277 return PotentialMI;
278}
279
280bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
281 // Find operand in instruction that matches source operand and replace it with
282 // target operand. Set corresponding src_sel
283
284 MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
285 MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
286 MachineOperand *SrcMods =
287 TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
288 assert(Src && Src->isReg());
289 if (!isSameReg(*Src, *getReplacedOperand())) {
290 // If this is not src0 then it should be src1
291 Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
292 SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
293 SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
294
295 assert(Src && Src->isReg());
296
297 if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
298 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
299 !isSameReg(*Src, *getReplacedOperand())) {
300 // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
301 // src2. This is not allowed.
302 return false;
303 }
304
305 assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods);
306 }
307 copyRegOperand(*Src, *getTargetOperand());
308 SrcSel->setImm(getSrcSel());
309 SrcMods->setImm(getSrcMods());
310 getTargetOperand()->setIsKill(false);
311 return true;
312}
313
314MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
315 // For SDWA dst operand potential instruction is one that defines register
316 // that this operand uses
317 MachineRegisterInfo *MRI = getMRI();
318 MachineInstr *ParentMI = getParentInst();
319 MachineOperand *Replaced = getReplacedOperand();
320 assert(Replaced->isReg());
321
322 for (MachineOperand &PotentialMO : MRI->def_operands(Replaced->getReg())) {
323 if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
324 continue;
325
Sam Koltonaff83412017-04-12 09:36:05 +0000326 if (!isSameReg(*Replaced, PotentialMO))
Sam Koltonf60ad582017-03-21 12:51:34 +0000327 return nullptr;
328
329 // Check that ParentMI is the only instruction that uses replaced register
330 for (MachineOperand &UseMO : MRI->use_operands(PotentialMO.getReg())) {
331 if (isSubregOf(UseMO, PotentialMO, MRI->getTargetRegisterInfo()) &&
332 UseMO.getParent() != ParentMI) {
333 return nullptr;
334 }
335 }
336
337 // Due to SSA this should be onle def of replaced register, so return it
338 return PotentialMO.getParent();
339 }
340
341 return nullptr;
342}
343
344bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
345 // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
346
347 if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
348 MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
349 getDstSel() != AMDGPU::SDWA::DWORD) {
350 // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
351 return false;
352 }
353
354 MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
355 assert(Operand &&
356 Operand->isReg() &&
357 isSameReg(*Operand, *getReplacedOperand()));
358 copyRegOperand(*Operand, *getTargetOperand());
359 MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
360 assert(DstSel);
361 DstSel->setImm(getDstSel());
362 MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
363 assert(DstUnused);
364 DstUnused->setImm(getDstUnused());
365
366 // Remove original instruction because it would conflict with our new
367 // instruction by register definition
368 getParentInst()->eraseFromParent();
369 return true;
370}
371
Sam Kolton27e0f8b2017-03-31 11:42:43 +0000372Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
373 if (Op.isImm()) {
374 return Op.getImm();
375 }
376
377 // If this is not immediate then it can be copy of immediate value, e.g.:
378 // %vreg1<def> = S_MOV_B32 255;
379 if (Op.isReg()) {
380 for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
381 if (!isSameReg(Op, Def))
382 continue;
383
384 const MachineInstr *DefInst = Def.getParent();
Sam Koltonaff83412017-04-12 09:36:05 +0000385 if (!TII->isFoldableCopy(*DefInst))
Sam Kolton27e0f8b2017-03-31 11:42:43 +0000386 return None;
387
388 const MachineOperand &Copied = DefInst->getOperand(1);
389 if (!Copied.isImm())
390 return None;
391
392 return Copied.getImm();
393 }
394 }
395
396 return None;
397}
398
Sam Koltonaff83412017-04-12 09:36:05 +0000399void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
400 for (MachineBasicBlock &MBB : MF) {
401 for (MachineInstr &MI : MBB) {
402 unsigned Opcode = MI.getOpcode();
403 switch (Opcode) {
404 case AMDGPU::V_LSHRREV_B32_e32:
405 case AMDGPU::V_ASHRREV_I32_e32:
406 case AMDGPU::V_LSHLREV_B32_e32: {
407 // from: v_lshrrev_b32_e32 v1, 16/24, v0
408 // to SDWA src:v0 src_sel:WORD_1/BYTE_3
Sam Koltonf60ad582017-03-21 12:51:34 +0000409
Sam Koltonaff83412017-04-12 09:36:05 +0000410 // from: v_ashrrev_i32_e32 v1, 16/24, v0
411 // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
Sam Koltonf60ad582017-03-21 12:51:34 +0000412
Sam Koltonaff83412017-04-12 09:36:05 +0000413 // from: v_lshlrev_b32_e32 v1, 16/24, v0
414 // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
415 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
416 auto Imm = foldToImm(*Src0);
417 if (!Imm)
418 break;
419
420 if (*Imm != 16 && *Imm != 24)
421 break;
422
423 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
424 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
425 if (TRI->isPhysicalRegister(Src1->getReg()) ||
426 TRI->isPhysicalRegister(Dst->getReg()))
427 break;
428
429 if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
430 auto SDWADst = make_unique<SDWADstOperand>(
431 Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
432 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
433 SDWAOperands[&MI] = std::move(SDWADst);
434 ++NumSDWAPatternsFound;
435 } else {
436 auto SDWASrc = make_unique<SDWASrcOperand>(
437 Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
438 Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
439 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
440 SDWAOperands[&MI] = std::move(SDWASrc);
441 ++NumSDWAPatternsFound;
442 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000443 break;
Sam Koltonaff83412017-04-12 09:36:05 +0000444 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000445
Sam Koltonaff83412017-04-12 09:36:05 +0000446 case AMDGPU::V_LSHRREV_B16_e32:
447 case AMDGPU::V_ASHRREV_I16_e32:
448 case AMDGPU::V_LSHLREV_B16_e32: {
449 // from: v_lshrrev_b16_e32 v1, 8, v0
450 // to SDWA src:v0 src_sel:BYTE_1
451
452 // from: v_ashrrev_i16_e32 v1, 8, v0
453 // to SDWA src:v0 src_sel:BYTE_1 sext:1
454
455 // from: v_lshlrev_b16_e32 v1, 8, v0
456 // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
457 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
458 auto Imm = foldToImm(*Src0);
459 if (!Imm || *Imm != 8)
460 break;
461
462 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
463 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
464
465 if (TRI->isPhysicalRegister(Src1->getReg()) ||
466 TRI->isPhysicalRegister(Dst->getReg()))
467 break;
468
469 if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
470 auto SDWADst =
471 make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
472 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
473 SDWAOperands[&MI] = std::move(SDWADst);
474 ++NumSDWAPatternsFound;
475 } else {
476 auto SDWASrc = make_unique<SDWASrcOperand>(
477 Src1, Dst, BYTE_1, false, false,
478 Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
479 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
480 SDWAOperands[&MI] = std::move(SDWASrc);
481 ++NumSDWAPatternsFound;
482 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000483 break;
Sam Koltonaff83412017-04-12 09:36:05 +0000484 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000485
Sam Koltonaff83412017-04-12 09:36:05 +0000486 case AMDGPU::V_BFE_I32:
487 case AMDGPU::V_BFE_U32: {
488 // e.g.:
489 // from: v_bfe_u32 v1, v0, 8, 8
490 // to SDWA src:v0 src_sel:BYTE_1
Sam Koltonf60ad582017-03-21 12:51:34 +0000491
Sam Koltonaff83412017-04-12 09:36:05 +0000492 // offset | width | src_sel
493 // ------------------------
494 // 0 | 8 | BYTE_0
495 // 0 | 16 | WORD_0
496 // 0 | 32 | DWORD ?
497 // 8 | 8 | BYTE_1
498 // 16 | 8 | BYTE_2
499 // 16 | 16 | WORD_1
500 // 24 | 8 | BYTE_3
501
502 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
503 auto Offset = foldToImm(*Src1);
504 if (!Offset)
505 break;
506
507 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
508 auto Width = foldToImm(*Src2);
509 if (!Width)
510 break;
511
512 SdwaSel SrcSel = DWORD;
513
514 if (*Offset == 0 && *Width == 8)
515 SrcSel = BYTE_0;
516 else if (*Offset == 0 && *Width == 16)
517 SrcSel = WORD_0;
518 else if (*Offset == 0 && *Width == 32)
519 SrcSel = DWORD;
520 else if (*Offset == 8 && *Width == 8)
521 SrcSel = BYTE_1;
522 else if (*Offset == 16 && *Width == 8)
523 SrcSel = BYTE_2;
524 else if (*Offset == 16 && *Width == 16)
525 SrcSel = WORD_1;
526 else if (*Offset == 24 && *Width == 8)
527 SrcSel = BYTE_3;
528 else
529 break;
530
531 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
532 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
533
534 if (TRI->isPhysicalRegister(Src0->getReg()) ||
535 TRI->isPhysicalRegister(Dst->getReg()))
536 break;
537
Sam Koltonf60ad582017-03-21 12:51:34 +0000538 auto SDWASrc = make_unique<SDWASrcOperand>(
Sam Koltonaff83412017-04-12 09:36:05 +0000539 Src0, Dst, SrcSel, false, false,
540 Opcode == AMDGPU::V_BFE_U32 ? false : true);
Sam Koltonf60ad582017-03-21 12:51:34 +0000541 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
542 SDWAOperands[&MI] = std::move(SDWASrc);
543 ++NumSDWAPatternsFound;
Sam Koltonaff83412017-04-12 09:36:05 +0000544 break;
Sam Koltonf60ad582017-03-21 12:51:34 +0000545 }
Sam Koltonaff83412017-04-12 09:36:05 +0000546 case AMDGPU::V_AND_B32_e32: {
547 // e.g.:
548 // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
549 // to SDWA src:v0 src_sel:WORD_0/BYTE_0
Sam Koltonf60ad582017-03-21 12:51:34 +0000550
Sam Koltonaff83412017-04-12 09:36:05 +0000551 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
552 auto Imm = foldToImm(*Src0);
553 if (!Imm)
554 break;
Sam Koltonf60ad582017-03-21 12:51:34 +0000555
Sam Koltonaff83412017-04-12 09:36:05 +0000556 if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
557 break;
Sam Koltonf60ad582017-03-21 12:51:34 +0000558
Sam Koltonaff83412017-04-12 09:36:05 +0000559 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
560 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
561
562 if (TRI->isPhysicalRegister(Src1->getReg()) ||
563 TRI->isPhysicalRegister(Dst->getReg()))
564 break;
Sam Koltonf60ad582017-03-21 12:51:34 +0000565
Sam Koltonf60ad582017-03-21 12:51:34 +0000566 auto SDWASrc = make_unique<SDWASrcOperand>(
Sam Koltonaff83412017-04-12 09:36:05 +0000567 Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
Sam Koltonf60ad582017-03-21 12:51:34 +0000568 DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
569 SDWAOperands[&MI] = std::move(SDWASrc);
570 ++NumSDWAPatternsFound;
Sam Koltonaff83412017-04-12 09:36:05 +0000571 break;
Sam Koltonf60ad582017-03-21 12:51:34 +0000572 }
Sam Koltonaff83412017-04-12 09:36:05 +0000573 }
Sam Koltonf60ad582017-03-21 12:51:34 +0000574 }
575 }
576}
577
578bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
579 const SDWAOperandsVector &SDWAOperands) {
580 // Check if this instruction can be converted to SDWA:
581 // 1. Does this opcode support SDWA
582 if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1)
583 return false;
584
585 // 2. Are all operands - VGPRs
586 for (const MachineOperand &Operand : MI.explicit_operands()) {
587 if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg()))
588 return false;
589 }
590
591 // Convert to sdwa
592 int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
593 assert(SDWAOpcode != -1);
594
595 const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
596
597 // Create SDWA version of instruction MI and initialize its operands
598 MachineInstrBuilder SDWAInst =
599 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
600
601 // Copy dst, if it is present in original then should also be present in SDWA
602 MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
603 if (Dst) {
604 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
605 SDWAInst.add(*Dst);
606 } else {
607 assert(TII->isVOPC(MI));
608 }
609
610 // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
611 // src0_modifiers (except for v_nop_sdwa, but it can't get here)
612 MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
613 assert(
614 Src0 &&
615 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
616 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
617 SDWAInst.addImm(0);
618 SDWAInst.add(*Src0);
619
620 // Copy src1 if present, initialize src1_modifiers.
621 MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
622 if (Src1) {
623 assert(
624 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
625 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
626 SDWAInst.addImm(0);
627 SDWAInst.add(*Src1);
628 } else {
629 assert(TII->isVOP1(MI));
630 }
631
632 if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
633 SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
634 // v_mac_f16/32 has additional src2 operand tied to vdst
635 MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
636 assert(Src2);
637 SDWAInst.add(*Src2);
638 }
639
640 // Initialize clamp.
641 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
642 SDWAInst.addImm(0);
643
644 // Initialize dst_sel and dst_unused if present
645 if (Dst) {
646 assert(
647 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
648 AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
649 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
650 SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
651 }
652
653 // Initialize src0_sel
654 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1);
655 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
656
657
658 // Initialize src1_sel if present
659 if (Src1) {
660 assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1);
661 SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
662 }
663
664 // Apply all sdwa operand pattenrs
665 bool Converted = false;
666 for (auto &Operand : SDWAOperands) {
667 Converted |= Operand->convertToSDWA(*SDWAInst, TII);
668 }
669 if (!Converted) {
670 SDWAInst->eraseFromParent();
671 return false;
672 }
673
674 DEBUG(dbgs() << "Convert instruction:" << MI
675 << "Into:" << *SDWAInst << '\n');
676 ++NumSDWAInstructionsPeepholed;
677
678 MI.eraseFromParent();
679 return true;
680}
681
682bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
683 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
684
685 if (!ST.hasSDWA() ||
686 !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9
687 return false;
688 }
689
690 MRI = &MF.getRegInfo();
691 TRI = ST.getRegisterInfo();
692 TII = ST.getInstrInfo();
693
694 std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
695
Sam Koltonaff83412017-04-12 09:36:05 +0000696 matchSDWAOperands(MF);
Sam Koltonf60ad582017-03-21 12:51:34 +0000697
Sam Koltonaff83412017-04-12 09:36:05 +0000698 for (auto &OperandPair : SDWAOperands) {
699 auto &Operand = OperandPair.second;
700 MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
701 if (PotentialMI) {
702 PotentialMatches[PotentialMI].push_back(std::move(Operand));
Sam Koltonf60ad582017-03-21 12:51:34 +0000703 }
704 }
Sam Koltonaff83412017-04-12 09:36:05 +0000705
706 for (auto &PotentialPair : PotentialMatches) {
707 MachineInstr &PotentialMI = *PotentialPair.first;
708 convertToSDWA(PotentialMI, PotentialPair.second);
709 }
710
711 SDWAOperands.clear();
Sam Koltonf60ad582017-03-21 12:51:34 +0000712 return false;
713}