| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1 | //===-- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions --===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | /// \file This pass tries to apply several peephole SDWA patterns. | 
|  | 11 | /// | 
|  | 12 | /// E.g. original: | 
|  | 13 | ///   V_LSHRREV_B32_e32 %vreg0, 16, %vreg1 | 
|  | 14 | ///   V_ADD_I32_e32 %vreg2, %vreg0, %vreg3 | 
|  | 15 | ///   V_LSHLREV_B32_e32 %vreg4, 16, %vreg2 | 
|  | 16 | /// | 
|  | 17 | /// Replace: | 
|  | 18 | ///   V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3 | 
|  | 19 | ///       dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD | 
|  | 20 | /// | 
|  | 21 | //===----------------------------------------------------------------------===// | 
|  | 22 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 23 | #include "AMDGPU.h" | 
|  | 24 | #include "AMDGPUSubtarget.h" | 
|  | 25 | #include "SIDefines.h" | 
|  | 26 | #include "SIInstrInfo.h" | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 27 | #include "llvm/ADT/STLExtras.h" | 
| Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 28 | #include "llvm/ADT/Statistic.h" | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 29 | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | 30 | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
|  | 31 | #include <unordered_map> | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 32 | #include <unordered_set> | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 33 |  | 
|  | 34 | using namespace llvm; | 
|  | 35 |  | 
|  | 36 | #define DEBUG_TYPE "si-peephole-sdwa" | 
|  | 37 |  | 
|  | 38 | STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found."); | 
|  | 39 | STATISTIC(NumSDWAInstructionsPeepholed, | 
|  | 40 | "Number of instruction converted to SDWA."); | 
|  | 41 |  | 
|  | 42 | namespace { | 
|  | 43 |  | 
|  | 44 | class SDWAOperand; | 
|  | 45 |  | 
|  | 46 | class SIPeepholeSDWA : public MachineFunctionPass { | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 47 | public: | 
|  | 48 | typedef SmallVector<SDWAOperand *, 4> SDWAOperandsVector; | 
|  | 49 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 50 | private: | 
|  | 51 | MachineRegisterInfo *MRI; | 
|  | 52 | const SIRegisterInfo *TRI; | 
|  | 53 | const SIInstrInfo *TII; | 
|  | 54 |  | 
|  | 55 | std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands; | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 56 | std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches; | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 57 | SmallVector<MachineInstr *, 8> ConvertedInstructions; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 58 |  | 
| Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 59 | Optional<int64_t> foldToImm(const MachineOperand &Op) const; | 
|  | 60 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 61 | public: | 
|  | 62 | static char ID; | 
|  | 63 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 64 | SIPeepholeSDWA() : MachineFunctionPass(ID) { | 
|  | 65 | initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry()); | 
|  | 66 | } | 
|  | 67 |  | 
|  | 68 | bool runOnMachineFunction(MachineFunction &MF) override; | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 69 | void matchSDWAOperands(MachineFunction &MF); | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 70 | bool isConvertibleToSDWA(const MachineInstr &MI) const; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 71 | bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 72 | void legalizeScalarOperands(MachineInstr &MI) const; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 73 |  | 
|  | 74 | StringRef getPassName() const override { return "SI Peephole SDWA"; } | 
|  | 75 |  | 
|  | 76 | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | 77 | AU.setPreservesCFG(); | 
|  | 78 | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | 79 | } | 
|  | 80 | }; | 
|  | 81 |  | 
|  | 82 | class SDWAOperand { | 
|  | 83 | private: | 
|  | 84 | MachineOperand *Target; // Operand that would be used in converted instruction | 
|  | 85 | MachineOperand *Replaced; // Operand that would be replace by Target | 
|  | 86 |  | 
|  | 87 | public: | 
|  | 88 | SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp) | 
|  | 89 | : Target(TargetOp), Replaced(ReplacedOp) { | 
|  | 90 | assert(Target->isReg()); | 
|  | 91 | assert(Replaced->isReg()); | 
|  | 92 | } | 
|  | 93 |  | 
|  | 94 | virtual ~SDWAOperand() {} | 
|  | 95 |  | 
|  | 96 | virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0; | 
|  | 97 | virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0; | 
|  | 98 |  | 
|  | 99 | MachineOperand *getTargetOperand() const { return Target; } | 
|  | 100 | MachineOperand *getReplacedOperand() const { return Replaced; } | 
|  | 101 | MachineInstr *getParentInst() const { return Target->getParent(); } | 
|  | 102 | MachineRegisterInfo *getMRI() const { | 
|  | 103 | return &getParentInst()->getParent()->getParent()->getRegInfo(); | 
|  | 104 | } | 
|  | 105 | }; | 
|  | 106 |  | 
|  | 107 | using namespace AMDGPU::SDWA; | 
|  | 108 |  | 
|  | 109 | class SDWASrcOperand : public SDWAOperand { | 
|  | 110 | private: | 
|  | 111 | SdwaSel SrcSel; | 
|  | 112 | bool Abs; | 
|  | 113 | bool Neg; | 
|  | 114 | bool Sext; | 
|  | 115 |  | 
|  | 116 | public: | 
|  | 117 | SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, | 
|  | 118 | SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false, | 
|  | 119 | bool Sext_ = false) | 
|  | 120 | : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_), | 
|  | 121 | Neg(Neg_), Sext(Sext_) {} | 
|  | 122 |  | 
|  | 123 | virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; | 
|  | 124 | virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; | 
|  | 125 |  | 
|  | 126 | SdwaSel getSrcSel() const { return SrcSel; } | 
|  | 127 | bool getAbs() const { return Abs; } | 
|  | 128 | bool getNeg() const { return Neg; } | 
|  | 129 | bool getSext() const { return Sext; } | 
|  | 130 |  | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 131 | uint64_t getSrcMods(const SIInstrInfo *TII, | 
|  | 132 | const MachineOperand *SrcOp) const; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 133 | }; | 
|  | 134 |  | 
|  | 135 | class SDWADstOperand : public SDWAOperand { | 
|  | 136 | private: | 
|  | 137 | SdwaSel DstSel; | 
|  | 138 | DstUnused DstUn; | 
|  | 139 |  | 
|  | 140 | public: | 
|  | 141 | SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, | 
|  | 142 | SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD) | 
|  | 143 | : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {} | 
|  | 144 |  | 
|  | 145 | virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; | 
|  | 146 | virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; | 
|  | 147 |  | 
|  | 148 | SdwaSel getDstSel() const { return DstSel; } | 
|  | 149 | DstUnused getDstUnused() const { return DstUn; } | 
|  | 150 | }; | 
|  | 151 |  | 
|  | 152 | } // End anonymous namespace. | 
|  | 153 |  | 
|  | 154 | INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false) | 
|  | 155 |  | 
|  | 156 | char SIPeepholeSDWA::ID = 0; | 
|  | 157 |  | 
|  | 158 | char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID; | 
|  | 159 |  | 
|  | 160 | FunctionPass *llvm::createSIPeepholeSDWAPass() { | 
|  | 161 | return new SIPeepholeSDWA(); | 
|  | 162 | } | 
|  | 163 |  | 
|  | 164 | #ifndef NDEBUG | 
|  | 165 |  | 
|  | 166 | static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) { | 
|  | 167 | switch(Sel) { | 
|  | 168 | case BYTE_0: OS << "BYTE_0"; break; | 
|  | 169 | case BYTE_1: OS << "BYTE_1"; break; | 
|  | 170 | case BYTE_2: OS << "BYTE_2"; break; | 
|  | 171 | case BYTE_3: OS << "BYTE_3"; break; | 
|  | 172 | case WORD_0: OS << "WORD_0"; break; | 
|  | 173 | case WORD_1: OS << "WORD_1"; break; | 
|  | 174 | case DWORD:  OS << "DWORD"; break; | 
|  | 175 | } | 
|  | 176 | return OS; | 
|  | 177 | } | 
|  | 178 |  | 
|  | 179 | static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) { | 
|  | 180 | switch(Un) { | 
|  | 181 | case UNUSED_PAD: OS << "UNUSED_PAD"; break; | 
|  | 182 | case UNUSED_SEXT: OS << "UNUSED_SEXT"; break; | 
|  | 183 | case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break; | 
|  | 184 | } | 
|  | 185 | return OS; | 
|  | 186 | } | 
|  | 187 |  | 
|  | 188 | static raw_ostream& operator<<(raw_ostream &OS, const SDWASrcOperand &Src) { | 
|  | 189 | OS << "SDWA src: " << *Src.getTargetOperand() | 
|  | 190 | << " src_sel:" << Src.getSrcSel() | 
|  | 191 | << " abs:" << Src.getAbs() << " neg:" << Src.getNeg() | 
|  | 192 | << " sext:" << Src.getSext() << '\n'; | 
|  | 193 | return OS; | 
|  | 194 | } | 
|  | 195 |  | 
|  | 196 | static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) { | 
|  | 197 | OS << "SDWA dst: " << *Dst.getTargetOperand() | 
|  | 198 | << " dst_sel:" << Dst.getDstSel() | 
|  | 199 | << " dst_unused:" << Dst.getDstUnused() << '\n'; | 
|  | 200 | return OS; | 
|  | 201 | } | 
|  | 202 |  | 
|  | 203 | #endif | 
|  | 204 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 205 | static void copyRegOperand(MachineOperand &To, const MachineOperand &From) { | 
|  | 206 | assert(To.isReg() && From.isReg()); | 
|  | 207 | To.setReg(From.getReg()); | 
|  | 208 | To.setSubReg(From.getSubReg()); | 
|  | 209 | To.setIsUndef(From.isUndef()); | 
|  | 210 | if (To.isUse()) { | 
|  | 211 | To.setIsKill(From.isKill()); | 
|  | 212 | } else { | 
|  | 213 | To.setIsDead(From.isDead()); | 
|  | 214 | } | 
|  | 215 | } | 
|  | 216 |  | 
|  | 217 | static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) { | 
|  | 218 | return LHS.isReg() && | 
|  | 219 | RHS.isReg() && | 
|  | 220 | LHS.getReg() == RHS.getReg() && | 
|  | 221 | LHS.getSubReg() == RHS.getSubReg(); | 
|  | 222 | } | 
|  | 223 |  | 
|  | 224 | static bool isSubregOf(const MachineOperand &SubReg, | 
|  | 225 | const MachineOperand &SuperReg, | 
|  | 226 | const TargetRegisterInfo *TRI) { | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 227 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 228 | if (!SuperReg.isReg() || !SubReg.isReg()) | 
|  | 229 | return false; | 
|  | 230 |  | 
|  | 231 | if (isSameReg(SuperReg, SubReg)) | 
|  | 232 | return true; | 
|  | 233 |  | 
|  | 234 | if (SuperReg.getReg() != SubReg.getReg()) | 
|  | 235 | return false; | 
|  | 236 |  | 
| Sam Kolton | 9fa1696 | 2017-04-06 15:03:28 +0000 | [diff] [blame] | 237 | LaneBitmask SuperMask = TRI->getSubRegIndexLaneMask(SuperReg.getSubReg()); | 
|  | 238 | LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubReg.getSubReg()); | 
|  | 239 | SuperMask |= ~SubMask; | 
|  | 240 | return SuperMask.all(); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 241 | } | 
|  | 242 |  | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 243 | uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, | 
|  | 244 | const MachineOperand *SrcOp) const { | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 245 | uint64_t Mods = 0; | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 246 | const auto *MI = SrcOp->getParent(); | 
|  | 247 | if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) { | 
|  | 248 | if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { | 
|  | 249 | Mods = Mod->getImm(); | 
|  | 250 | } | 
|  | 251 | } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) { | 
|  | 252 | if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) { | 
|  | 253 | Mods = Mod->getImm(); | 
|  | 254 | } | 
|  | 255 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 256 | if (Abs || Neg) { | 
|  | 257 | assert(!Sext && | 
|  | 258 | "Float and integer src modifiers can't be set simulteniously"); | 
|  | 259 | Mods |= Abs ? SISrcMods::ABS : 0; | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 260 | Mods ^= Neg ? SISrcMods::NEG : 0; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 261 | } else if (Sext) { | 
|  | 262 | Mods |= SISrcMods::SEXT; | 
|  | 263 | } | 
|  | 264 |  | 
|  | 265 | return Mods; | 
|  | 266 | } | 
|  | 267 |  | 
|  | 268 | MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) { | 
|  | 269 | // For SDWA src operand potential instruction is one that use register | 
|  | 270 | // defined by parent instruction | 
|  | 271 | MachineRegisterInfo *MRI = getMRI(); | 
|  | 272 | MachineOperand *Replaced = getReplacedOperand(); | 
|  | 273 | assert(Replaced->isReg()); | 
|  | 274 |  | 
|  | 275 | MachineInstr *PotentialMI = nullptr; | 
|  | 276 | for (MachineOperand &PotentialMO : MRI->use_operands(Replaced->getReg())) { | 
|  | 277 | // If this is use of another subreg of dst reg then do nothing | 
|  | 278 | if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo())) | 
|  | 279 | continue; | 
|  | 280 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 281 | // If there exist use of superreg of dst then we should not combine this | 
|  | 282 | // opernad | 
|  | 283 | if (!isSameReg(PotentialMO, *Replaced)) | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 284 | return nullptr; | 
|  | 285 |  | 
|  | 286 | // Check that PotentialMI is only instruction that uses dst reg | 
|  | 287 | if (PotentialMI == nullptr) { | 
|  | 288 | PotentialMI = PotentialMO.getParent(); | 
|  | 289 | } else if (PotentialMI != PotentialMO.getParent()) { | 
|  | 290 | return nullptr; | 
|  | 291 | } | 
|  | 292 | } | 
|  | 293 |  | 
|  | 294 | return PotentialMI; | 
|  | 295 | } | 
|  | 296 |  | 
|  | 297 | bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { | 
|  | 298 | // Find operand in instruction that matches source operand and replace it with | 
|  | 299 | // target operand. Set corresponding src_sel | 
|  | 300 |  | 
|  | 301 | MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 302 | MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); | 
|  | 303 | MachineOperand *SrcMods = | 
|  | 304 | TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 305 | assert(Src && (Src->isReg() || Src->isImm())); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 306 | if (!isSameReg(*Src, *getReplacedOperand())) { | 
|  | 307 | // If this is not src0 then it should be src1 | 
|  | 308 | Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 309 | SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); | 
|  | 310 | SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); | 
|  | 311 |  | 
|  | 312 | assert(Src && Src->isReg()); | 
|  | 313 |  | 
|  | 314 | if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || | 
|  | 315 | MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && | 
|  | 316 | !isSameReg(*Src, *getReplacedOperand())) { | 
|  | 317 | // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to | 
|  | 318 | // src2. This is not allowed. | 
|  | 319 | return false; | 
|  | 320 | } | 
|  | 321 |  | 
|  | 322 | assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods); | 
|  | 323 | } | 
|  | 324 | copyRegOperand(*Src, *getTargetOperand()); | 
|  | 325 | SrcSel->setImm(getSrcSel()); | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 326 | SrcMods->setImm(getSrcMods(TII, Src)); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 327 | getTargetOperand()->setIsKill(false); | 
|  | 328 | return true; | 
|  | 329 | } | 
|  | 330 |  | 
|  | 331 | MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) { | 
|  | 332 | // For SDWA dst operand potential instruction is one that defines register | 
|  | 333 | // that this operand uses | 
|  | 334 | MachineRegisterInfo *MRI = getMRI(); | 
|  | 335 | MachineInstr *ParentMI = getParentInst(); | 
|  | 336 | MachineOperand *Replaced = getReplacedOperand(); | 
|  | 337 | assert(Replaced->isReg()); | 
|  | 338 |  | 
|  | 339 | for (MachineOperand &PotentialMO : MRI->def_operands(Replaced->getReg())) { | 
|  | 340 | if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo())) | 
|  | 341 | continue; | 
|  | 342 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 343 | if (!isSameReg(*Replaced, PotentialMO)) | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 344 | return nullptr; | 
|  | 345 |  | 
|  | 346 | // Check that ParentMI is the only instruction that uses replaced register | 
|  | 347 | for (MachineOperand &UseMO : MRI->use_operands(PotentialMO.getReg())) { | 
|  | 348 | if (isSubregOf(UseMO, PotentialMO, MRI->getTargetRegisterInfo()) && | 
|  | 349 | UseMO.getParent() != ParentMI) { | 
|  | 350 | return nullptr; | 
|  | 351 | } | 
|  | 352 | } | 
|  | 353 |  | 
|  | 354 | // Due to SSA this should be onle def of replaced register, so return it | 
|  | 355 | return PotentialMO.getParent(); | 
|  | 356 | } | 
|  | 357 |  | 
|  | 358 | return nullptr; | 
|  | 359 | } | 
|  | 360 |  | 
|  | 361 | bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { | 
|  | 362 | // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused | 
|  | 363 |  | 
|  | 364 | if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || | 
|  | 365 | MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && | 
|  | 366 | getDstSel() != AMDGPU::SDWA::DWORD) { | 
|  | 367 | // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD | 
|  | 368 | return false; | 
|  | 369 | } | 
|  | 370 |  | 
|  | 371 | MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
|  | 372 | assert(Operand && | 
|  | 373 | Operand->isReg() && | 
|  | 374 | isSameReg(*Operand, *getReplacedOperand())); | 
|  | 375 | copyRegOperand(*Operand, *getTargetOperand()); | 
|  | 376 | MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); | 
|  | 377 | assert(DstSel); | 
|  | 378 | DstSel->setImm(getDstSel()); | 
|  | 379 | MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); | 
|  | 380 | assert(DstUnused); | 
|  | 381 | DstUnused->setImm(getDstUnused()); | 
|  | 382 |  | 
|  | 383 | // Remove original instruction  because it would conflict with our new | 
|  | 384 | // instruction by register definition | 
|  | 385 | getParentInst()->eraseFromParent(); | 
|  | 386 | return true; | 
|  | 387 | } | 
|  | 388 |  | 
| Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 389 | Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const { | 
|  | 390 | if (Op.isImm()) { | 
|  | 391 | return Op.getImm(); | 
|  | 392 | } | 
|  | 393 |  | 
|  | 394 | // If this is not immediate then it can be copy of immediate value, e.g.: | 
|  | 395 | // %vreg1<def> = S_MOV_B32 255; | 
|  | 396 | if (Op.isReg()) { | 
|  | 397 | for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) { | 
|  | 398 | if (!isSameReg(Op, Def)) | 
|  | 399 | continue; | 
|  | 400 |  | 
|  | 401 | const MachineInstr *DefInst = Def.getParent(); | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 402 | if (!TII->isFoldableCopy(*DefInst)) | 
| Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 403 | return None; | 
|  | 404 |  | 
|  | 405 | const MachineOperand &Copied = DefInst->getOperand(1); | 
|  | 406 | if (!Copied.isImm()) | 
|  | 407 | return None; | 
|  | 408 |  | 
|  | 409 | return Copied.getImm(); | 
|  | 410 | } | 
|  | 411 | } | 
|  | 412 |  | 
|  | 413 | return None; | 
|  | 414 | } | 
|  | 415 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 416 | void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { | 
|  | 417 | for (MachineBasicBlock &MBB : MF) { | 
|  | 418 | for (MachineInstr &MI : MBB) { | 
|  | 419 | unsigned Opcode = MI.getOpcode(); | 
|  | 420 | switch (Opcode) { | 
|  | 421 | case AMDGPU::V_LSHRREV_B32_e32: | 
|  | 422 | case AMDGPU::V_ASHRREV_I32_e32: | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 423 | case AMDGPU::V_LSHLREV_B32_e32: | 
|  | 424 | case AMDGPU::V_LSHRREV_B32_e64: | 
|  | 425 | case AMDGPU::V_ASHRREV_I32_e64: | 
|  | 426 | case AMDGPU::V_LSHLREV_B32_e64: { | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 427 | // from: v_lshrrev_b32_e32 v1, 16/24, v0 | 
|  | 428 | // to SDWA src:v0 src_sel:WORD_1/BYTE_3 | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 429 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 430 | // from: v_ashrrev_i32_e32 v1, 16/24, v0 | 
|  | 431 | // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1 | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 432 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 433 | // from: v_lshlrev_b32_e32 v1, 16/24, v0 | 
|  | 434 | // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD | 
|  | 435 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 436 | auto Imm = foldToImm(*Src0); | 
|  | 437 | if (!Imm) | 
|  | 438 | break; | 
|  | 439 |  | 
|  | 440 | if (*Imm != 16 && *Imm != 24) | 
|  | 441 | break; | 
|  | 442 |  | 
|  | 443 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 444 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
|  | 445 | if (TRI->isPhysicalRegister(Src1->getReg()) || | 
|  | 446 | TRI->isPhysicalRegister(Dst->getReg())) | 
|  | 447 | break; | 
|  | 448 |  | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 449 | if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || | 
|  | 450 | Opcode == AMDGPU::V_LSHLREV_B32_e64) { | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 451 | auto SDWADst = make_unique<SDWADstOperand>( | 
|  | 452 | Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); | 
|  | 453 | DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); | 
|  | 454 | SDWAOperands[&MI] = std::move(SDWADst); | 
|  | 455 | ++NumSDWAPatternsFound; | 
|  | 456 | } else { | 
|  | 457 | auto SDWASrc = make_unique<SDWASrcOperand>( | 
|  | 458 | Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 459 | Opcode != AMDGPU::V_LSHRREV_B32_e32 && | 
|  | 460 | Opcode != AMDGPU::V_LSHRREV_B32_e64); | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 461 | DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); | 
|  | 462 | SDWAOperands[&MI] = std::move(SDWASrc); | 
|  | 463 | ++NumSDWAPatternsFound; | 
|  | 464 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 465 | break; | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 466 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 467 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 468 | case AMDGPU::V_LSHRREV_B16_e32: | 
|  | 469 | case AMDGPU::V_ASHRREV_I16_e32: | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 470 | case AMDGPU::V_LSHLREV_B16_e32: | 
|  | 471 | case AMDGPU::V_LSHRREV_B16_e64: | 
|  | 472 | case AMDGPU::V_ASHRREV_I16_e64: | 
|  | 473 | case AMDGPU::V_LSHLREV_B16_e64: { | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 474 | // from: v_lshrrev_b16_e32 v1, 8, v0 | 
|  | 475 | // to SDWA src:v0 src_sel:BYTE_1 | 
|  | 476 |  | 
|  | 477 | // from: v_ashrrev_i16_e32 v1, 8, v0 | 
|  | 478 | // to SDWA src:v0 src_sel:BYTE_1 sext:1 | 
|  | 479 |  | 
|  | 480 | // from: v_lshlrev_b16_e32 v1, 8, v0 | 
|  | 481 | // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD | 
|  | 482 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 483 | auto Imm = foldToImm(*Src0); | 
|  | 484 | if (!Imm || *Imm != 8) | 
|  | 485 | break; | 
|  | 486 |  | 
|  | 487 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 488 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
|  | 489 |  | 
|  | 490 | if (TRI->isPhysicalRegister(Src1->getReg()) || | 
|  | 491 | TRI->isPhysicalRegister(Dst->getReg())) | 
|  | 492 | break; | 
|  | 493 |  | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 494 | if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || | 
|  | 495 | Opcode == AMDGPU::V_LSHLREV_B16_e64) { | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 496 | auto SDWADst = | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 497 | make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD); | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 498 | DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n'); | 
|  | 499 | SDWAOperands[&MI] = std::move(SDWADst); | 
|  | 500 | ++NumSDWAPatternsFound; | 
|  | 501 | } else { | 
|  | 502 | auto SDWASrc = make_unique<SDWASrcOperand>( | 
|  | 503 | Src1, Dst, BYTE_1, false, false, | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 504 | Opcode != AMDGPU::V_LSHRREV_B16_e32 && | 
|  | 505 | Opcode != AMDGPU::V_LSHRREV_B16_e64); | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 506 | DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); | 
|  | 507 | SDWAOperands[&MI] = std::move(SDWASrc); | 
|  | 508 | ++NumSDWAPatternsFound; | 
|  | 509 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 510 | break; | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 511 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 512 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 513 | case AMDGPU::V_BFE_I32: | 
|  | 514 | case AMDGPU::V_BFE_U32: { | 
|  | 515 | // e.g.: | 
|  | 516 | // from: v_bfe_u32 v1, v0, 8, 8 | 
|  | 517 | // to SDWA src:v0 src_sel:BYTE_1 | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 518 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 519 | // offset | width | src_sel | 
|  | 520 | // ------------------------ | 
|  | 521 | // 0      | 8     | BYTE_0 | 
|  | 522 | // 0      | 16    | WORD_0 | 
|  | 523 | // 0      | 32    | DWORD ? | 
|  | 524 | // 8      | 8     | BYTE_1 | 
|  | 525 | // 16     | 8     | BYTE_2 | 
|  | 526 | // 16     | 16    | WORD_1 | 
|  | 527 | // 24     | 8     | BYTE_3 | 
|  | 528 |  | 
|  | 529 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 530 | auto Offset = foldToImm(*Src1); | 
|  | 531 | if (!Offset) | 
|  | 532 | break; | 
|  | 533 |  | 
|  | 534 | MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); | 
|  | 535 | auto Width = foldToImm(*Src2); | 
|  | 536 | if (!Width) | 
|  | 537 | break; | 
|  | 538 |  | 
|  | 539 | SdwaSel SrcSel = DWORD; | 
|  | 540 |  | 
|  | 541 | if (*Offset == 0 && *Width == 8) | 
|  | 542 | SrcSel = BYTE_0; | 
|  | 543 | else if (*Offset == 0 && *Width == 16) | 
|  | 544 | SrcSel = WORD_0; | 
|  | 545 | else if (*Offset == 0 && *Width == 32) | 
|  | 546 | SrcSel = DWORD; | 
|  | 547 | else if (*Offset == 8 && *Width == 8) | 
|  | 548 | SrcSel = BYTE_1; | 
|  | 549 | else if (*Offset == 16 && *Width == 8) | 
|  | 550 | SrcSel = BYTE_2; | 
|  | 551 | else if (*Offset == 16 && *Width == 16) | 
|  | 552 | SrcSel = WORD_1; | 
|  | 553 | else if (*Offset == 24 && *Width == 8) | 
|  | 554 | SrcSel = BYTE_3; | 
|  | 555 | else | 
|  | 556 | break; | 
|  | 557 |  | 
|  | 558 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 559 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 560 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 561 | if (TRI->isPhysicalRegister(Src0->getReg()) || | 
|  | 562 | TRI->isPhysicalRegister(Dst->getReg())) | 
|  | 563 | break; | 
|  | 564 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 565 | auto SDWASrc = make_unique<SDWASrcOperand>( | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 566 | Src0, Dst, SrcSel, false, false, | 
|  | 567 | Opcode == AMDGPU::V_BFE_U32 ? false : true); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 568 | DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); | 
|  | 569 | SDWAOperands[&MI] = std::move(SDWASrc); | 
|  | 570 | ++NumSDWAPatternsFound; | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 571 | break; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 572 | } | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 573 | case AMDGPU::V_AND_B32_e32: | 
|  | 574 | case AMDGPU::V_AND_B32_e64: { | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 575 | // e.g.: | 
|  | 576 | // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 | 
|  | 577 | // to SDWA src:v0 src_sel:WORD_0/BYTE_0 | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 578 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 579 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 580 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 581 | auto ValSrc = Src1; | 
|  | 582 | auto Imm = foldToImm(*Src0); | 
|  | 583 |  | 
|  | 584 | if (!Imm) { | 
|  | 585 | Imm = foldToImm(*Src1); | 
|  | 586 | ValSrc = Src0; | 
|  | 587 | } | 
|  | 588 |  | 
|  | 589 | if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) | 
|  | 590 | break; | 
|  | 591 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 592 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 593 |  | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 594 | if (TRI->isPhysicalRegister(Src1->getReg()) || | 
|  | 595 | TRI->isPhysicalRegister(Dst->getReg())) | 
|  | 596 | break; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 597 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 598 | auto SDWASrc = make_unique<SDWASrcOperand>( | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 599 | ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 600 | DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n'); | 
|  | 601 | SDWAOperands[&MI] = std::move(SDWASrc); | 
|  | 602 | ++NumSDWAPatternsFound; | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 603 | break; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 604 | } | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 605 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 606 | } | 
|  | 607 | } | 
|  | 608 | } | 
|  | 609 |  | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 610 | bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const { | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 611 | // Check if this instruction has opcode that supports SDWA | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 612 | unsigned Opc = MI.getOpcode(); | 
|  | 613 | if (AMDGPU::getSDWAOp(Opc) != -1) | 
|  | 614 | return true; | 
|  | 615 | int Opc32 = AMDGPU::getVOPe32(Opc); | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 616 | if (Opc32 != -1 && AMDGPU::getSDWAOp(Opc32) != -1) { | 
|  | 617 | if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) | 
|  | 618 | return false; | 
|  | 619 |  | 
|  | 620 | if (TII->isVOPC(Opc)) { | 
|  | 621 | const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); | 
|  | 622 | return SDst && SDst->getReg() == AMDGPU::VCC; | 
|  | 623 | } else { | 
|  | 624 | return !TII->getNamedOperand(MI, AMDGPU::OpName::sdst); | 
|  | 625 | } | 
|  | 626 | } | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 627 | return false; | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 628 | } | 
|  | 629 |  | 
|  | 630 | bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, | 
|  | 631 | const SDWAOperandsVector &SDWAOperands) { | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 632 | // Convert to sdwa | 
|  | 633 | int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode()); | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 634 | if (SDWAOpcode == -1) | 
|  | 635 | SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode())); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 636 | assert(SDWAOpcode != -1); | 
|  | 637 |  | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 638 | // Copy dst, if it is present in original then should also be present in SDWA | 
|  | 639 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
|  | 640 | if (!Dst && !TII->isVOPC(MI)) | 
|  | 641 | return false; | 
|  | 642 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 643 | const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); | 
|  | 644 |  | 
|  | 645 | // Create SDWA version of instruction MI and initialize its operands | 
|  | 646 | MachineInstrBuilder SDWAInst = | 
|  | 647 | BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); | 
|  | 648 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 649 | if (Dst) { | 
|  | 650 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); | 
|  | 651 | SDWAInst.add(*Dst); | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 652 | } else { | 
|  | 653 | Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); | 
|  | 654 | assert(Dst && | 
|  | 655 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); | 
|  | 656 | SDWAInst.add(*Dst); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 657 | } | 
|  | 658 |  | 
|  | 659 | // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and | 
|  | 660 | // src0_modifiers (except for v_nop_sdwa, but it can't get here) | 
|  | 661 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 662 | assert( | 
|  | 663 | Src0 && | 
|  | 664 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && | 
|  | 665 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 666 | if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) | 
|  | 667 | SDWAInst.addImm(Mod->getImm()); | 
|  | 668 | else | 
|  | 669 | SDWAInst.addImm(0); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 670 | SDWAInst.add(*Src0); | 
|  | 671 |  | 
|  | 672 | // Copy src1 if present, initialize src1_modifiers. | 
|  | 673 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 674 | if (Src1) { | 
|  | 675 | assert( | 
|  | 676 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && | 
|  | 677 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 678 | if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) | 
|  | 679 | SDWAInst.addImm(Mod->getImm()); | 
|  | 680 | else | 
|  | 681 | SDWAInst.addImm(0); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 682 | SDWAInst.add(*Src1); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 683 | } | 
|  | 684 |  | 
|  | 685 | if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || | 
|  | 686 | SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) { | 
|  | 687 | // v_mac_f16/32 has additional src2 operand tied to vdst | 
|  | 688 | MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); | 
|  | 689 | assert(Src2); | 
|  | 690 | SDWAInst.add(*Src2); | 
|  | 691 | } | 
|  | 692 |  | 
|  | 693 | // Initialize clamp. | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 694 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1) | 
|  | 695 | SDWAInst.addImm(0); | 
|  | 696 |  | 
|  | 697 | // Initialize omod. | 
|  | 698 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) | 
|  | 699 | SDWAInst.addImm(0); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 700 |  | 
|  | 701 | // Initialize dst_sel and dst_unused if present | 
|  | 702 | if (Dst) { | 
|  | 703 | assert( | 
|  | 704 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 && | 
|  | 705 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1); | 
|  | 706 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); | 
|  | 707 | SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD); | 
|  | 708 | } | 
|  | 709 |  | 
|  | 710 | // Initialize src0_sel | 
|  | 711 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1); | 
|  | 712 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); | 
|  | 713 |  | 
|  | 714 |  | 
|  | 715 | // Initialize src1_sel if present | 
|  | 716 | if (Src1) { | 
|  | 717 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1); | 
|  | 718 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); | 
|  | 719 | } | 
|  | 720 |  | 
|  | 721 | // Apply all sdwa operand pattenrs | 
|  | 722 | bool Converted = false; | 
|  | 723 | for (auto &Operand : SDWAOperands) { | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 724 | // There should be no intesection between SDWA operands and potential MIs | 
|  | 725 | // e.g.: | 
|  | 726 | // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0 | 
|  | 727 | // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0 | 
|  | 728 | // v_add_u32 v3, v4, v2 | 
|  | 729 | // | 
|  | 730 | // In that example it is possible that we would fold 2nd instruction into 3rd | 
|  | 731 | // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was | 
|  | 732 | // already destroyed). So if SDWAOperand is also a potential MI then do not | 
|  | 733 | // apply it. | 
|  | 734 | if (PotentialMatches.count(Operand->getParentInst()) == 0) | 
|  | 735 | Converted |= Operand->convertToSDWA(*SDWAInst, TII); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 736 | } | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 737 | if (Converted) { | 
|  | 738 | ConvertedInstructions.push_back(SDWAInst); | 
|  | 739 | } else { | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 740 | SDWAInst->eraseFromParent(); | 
|  | 741 | return false; | 
|  | 742 | } | 
|  | 743 |  | 
|  | 744 | DEBUG(dbgs() << "Convert instruction:" << MI | 
|  | 745 | << "Into:" << *SDWAInst << '\n'); | 
|  | 746 | ++NumSDWAInstructionsPeepholed; | 
|  | 747 |  | 
|  | 748 | MI.eraseFromParent(); | 
|  | 749 | return true; | 
|  | 750 | } | 
|  | 751 |  | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 752 | // If an instruction was converted to SDWA it should not have immediates or SGPR | 
|  | 753 | // operands. Copy its scalar operands into VGPRs. | 
|  | 754 | void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const { | 
|  | 755 | const MCInstrDesc &Desc = TII->get(MI.getOpcode()); | 
|  | 756 | for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { | 
|  | 757 | MachineOperand &Op = MI.getOperand(I); | 
|  | 758 | if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg()))) | 
|  | 759 | continue; | 
|  | 760 | if (Desc.OpInfo[I].RegClass == -1 || | 
|  | 761 | !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass))) | 
|  | 762 | continue; | 
|  | 763 | unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); | 
|  | 764 | auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), | 
|  | 765 | TII->get(AMDGPU::V_MOV_B32_e32), VGPR); | 
|  | 766 | if (Op.isImm()) | 
|  | 767 | Copy.addImm(Op.getImm()); | 
|  | 768 | else if (Op.isReg()) | 
|  | 769 | Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0, | 
|  | 770 | Op.getSubReg()); | 
|  | 771 | Op.ChangeToRegister(VGPR, false); | 
|  | 772 | } | 
|  | 773 | } | 
|  | 774 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 775 | bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { | 
|  | 776 | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); | 
|  | 777 |  | 
|  | 778 | if (!ST.hasSDWA() || | 
|  | 779 | !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9 | 
|  | 780 | return false; | 
|  | 781 | } | 
|  | 782 |  | 
|  | 783 | MRI = &MF.getRegInfo(); | 
|  | 784 | TRI = ST.getRegisterInfo(); | 
|  | 785 | TII = ST.getInstrInfo(); | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 786 |  | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 787 | // Find all SDWA operands in MF. | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 788 | matchSDWAOperands(MF); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 789 |  | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 790 | for (const auto &OperandPair : SDWAOperands) { | 
|  | 791 | const auto &Operand = OperandPair.second; | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 792 | MachineInstr *PotentialMI = Operand->potentialToConvert(TII); | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 793 | if (PotentialMI && isConvertibleToSDWA(*PotentialMI)) { | 
|  | 794 | PotentialMatches[PotentialMI].push_back(Operand.get()); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 795 | } | 
|  | 796 | } | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 797 |  | 
|  | 798 | for (auto &PotentialPair : PotentialMatches) { | 
|  | 799 | MachineInstr &PotentialMI = *PotentialPair.first; | 
|  | 800 | convertToSDWA(PotentialMI, PotentialPair.second); | 
|  | 801 | } | 
|  | 802 |  | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 803 | PotentialMatches.clear(); | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 804 | SDWAOperands.clear(); | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 805 |  | 
| Stanislav Mekhanoshin | e4cda74 | 2017-06-06 16:42:30 +0000 | [diff] [blame] | 806 | bool Ret = !ConvertedInstructions.empty(); | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 807 | while (!ConvertedInstructions.empty()) | 
|  | 808 | legalizeScalarOperands(*ConvertedInstructions.pop_back_val()); | 
|  | 809 |  | 
| Stanislav Mekhanoshin | e4cda74 | 2017-06-06 16:42:30 +0000 | [diff] [blame] | 810 | return Ret; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 811 | } |