Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 1 | //===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===// |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | /// \file This pass tries to apply several peephole SDWA patterns. |
| 11 | /// |
| 12 | /// E.g. original: |
Francis Visoiu Mistrih | 93ef145 | 2017-11-30 12:12:19 +0000 | [diff] [blame] | 13 | /// V_LSHRREV_B32_e32 %0, 16, %1 |
| 14 | /// V_ADD_I32_e32 %2, %0, %3 |
| 15 | /// V_LSHLREV_B32_e32 %4, 16, %2 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 16 | /// |
| 17 | /// Replace: |
Francis Visoiu Mistrih | 93ef145 | 2017-11-30 12:12:19 +0000 | [diff] [blame] | 18 | /// V_ADD_I32_sdwa %4, %1, %3 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 19 | /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| 20 | /// |
| 21 | //===----------------------------------------------------------------------===// |
| 22 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 23 | #include "AMDGPU.h" |
| 24 | #include "AMDGPUSubtarget.h" |
| 25 | #include "SIDefines.h" |
| 26 | #include "SIInstrInfo.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 27 | #include "SIRegisterInfo.h" |
| 28 | #include "Utils/AMDGPUBaseInfo.h" |
| 29 | #include "llvm/ADT/None.h" |
| 30 | #include "llvm/ADT/Optional.h" |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 31 | #include "llvm/ADT/STLExtras.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 32 | #include "llvm/ADT/SmallVector.h" |
Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 33 | #include "llvm/ADT/Statistic.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 34 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 35 | #include "llvm/CodeGen/MachineFunction.h" |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 36 | #include "llvm/CodeGen/MachineFunctionPass.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 37 | #include "llvm/CodeGen/MachineInstr.h" |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 38 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 39 | #include "llvm/CodeGen/MachineOperand.h" |
| 40 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
David Blaikie | b3bde2e | 2017-11-17 01:07:10 +0000 | [diff] [blame] | 41 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 42 | #include "llvm/MC/LaneBitmask.h" |
| 43 | #include "llvm/MC/MCInstrDesc.h" |
| 44 | #include "llvm/Pass.h" |
| 45 | #include "llvm/Support/Debug.h" |
| 46 | #include "llvm/Support/raw_ostream.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 47 | #include <algorithm> |
| 48 | #include <cassert> |
| 49 | #include <cstdint> |
| 50 | #include <memory> |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 51 | #include <unordered_map> |
| 52 | |
| 53 | using namespace llvm; |
| 54 | |
| 55 | #define DEBUG_TYPE "si-peephole-sdwa" |
| 56 | |
| 57 | STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found."); |
| 58 | STATISTIC(NumSDWAInstructionsPeepholed, |
| 59 | "Number of instruction converted to SDWA."); |
| 60 | |
| 61 | namespace { |
| 62 | |
| 63 | class SDWAOperand; |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 64 | class SDWADstOperand; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 65 | |
| 66 | class SIPeepholeSDWA : public MachineFunctionPass { |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 67 | public: |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 68 | using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>; |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 69 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 70 | private: |
| 71 | MachineRegisterInfo *MRI; |
| 72 | const SIRegisterInfo *TRI; |
| 73 | const SIInstrInfo *TII; |
| 74 | |
| 75 | std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands; |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 76 | std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches; |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 77 | SmallVector<MachineInstr *, 8> ConvertedInstructions; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 78 | |
Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 79 | Optional<int64_t> foldToImm(const MachineOperand &Op) const; |
| 80 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 81 | public: |
| 82 | static char ID; |
| 83 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 84 | SIPeepholeSDWA() : MachineFunctionPass(ID) { |
| 85 | initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry()); |
| 86 | } |
| 87 | |
| 88 | bool runOnMachineFunction(MachineFunction &MF) override; |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 89 | void matchSDWAOperands(MachineFunction &MF); |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 90 | std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI); |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 91 | bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 92 | bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 93 | void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 94 | |
| 95 | StringRef getPassName() const override { return "SI Peephole SDWA"; } |
| 96 | |
| 97 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 98 | AU.setPreservesCFG(); |
| 99 | MachineFunctionPass::getAnalysisUsage(AU); |
| 100 | } |
| 101 | }; |
| 102 | |
| 103 | class SDWAOperand { |
| 104 | private: |
| 105 | MachineOperand *Target; // Operand that would be used in converted instruction |
| 106 | MachineOperand *Replaced; // Operand that would be replace by Target |
| 107 | |
| 108 | public: |
| 109 | SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp) |
| 110 | : Target(TargetOp), Replaced(ReplacedOp) { |
| 111 | assert(Target->isReg()); |
| 112 | assert(Replaced->isReg()); |
| 113 | } |
| 114 | |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 115 | virtual ~SDWAOperand() = default; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 116 | |
| 117 | virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0; |
| 118 | virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0; |
| 119 | |
| 120 | MachineOperand *getTargetOperand() const { return Target; } |
| 121 | MachineOperand *getReplacedOperand() const { return Replaced; } |
| 122 | MachineInstr *getParentInst() const { return Target->getParent(); } |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 123 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 124 | MachineRegisterInfo *getMRI() const { |
| 125 | return &getParentInst()->getParent()->getParent()->getRegInfo(); |
| 126 | } |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 127 | |
| 128 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 129 | virtual void print(raw_ostream& OS) const = 0; |
| 130 | void dump() const { print(dbgs()); } |
| 131 | #endif |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 132 | }; |
| 133 | |
| 134 | using namespace AMDGPU::SDWA; |
| 135 | |
| 136 | class SDWASrcOperand : public SDWAOperand { |
| 137 | private: |
| 138 | SdwaSel SrcSel; |
| 139 | bool Abs; |
| 140 | bool Neg; |
| 141 | bool Sext; |
| 142 | |
| 143 | public: |
| 144 | SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, |
| 145 | SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false, |
| 146 | bool Sext_ = false) |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 147 | : SDWAOperand(TargetOp, ReplacedOp), |
| 148 | SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 149 | |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 150 | MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; |
| 151 | bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 152 | |
| 153 | SdwaSel getSrcSel() const { return SrcSel; } |
| 154 | bool getAbs() const { return Abs; } |
| 155 | bool getNeg() const { return Neg; } |
| 156 | bool getSext() const { return Sext; } |
| 157 | |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 158 | uint64_t getSrcMods(const SIInstrInfo *TII, |
| 159 | const MachineOperand *SrcOp) const; |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 160 | |
| 161 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 162 | void print(raw_ostream& OS) const override; |
| 163 | #endif |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 164 | }; |
| 165 | |
| 166 | class SDWADstOperand : public SDWAOperand { |
| 167 | private: |
| 168 | SdwaSel DstSel; |
| 169 | DstUnused DstUn; |
| 170 | |
| 171 | public: |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 172 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 173 | SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, |
| 174 | SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD) |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 175 | : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 176 | |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 177 | MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; |
| 178 | bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 179 | |
| 180 | SdwaSel getDstSel() const { return DstSel; } |
| 181 | DstUnused getDstUnused() const { return DstUn; } |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 182 | |
| 183 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 184 | void print(raw_ostream& OS) const override; |
| 185 | #endif |
| 186 | }; |
| 187 | |
| 188 | class SDWADstPreserveOperand : public SDWADstOperand { |
| 189 | private: |
| 190 | MachineOperand *Preserve; |
| 191 | |
| 192 | public: |
| 193 | SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, |
| 194 | MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD) |
| 195 | : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE), |
| 196 | Preserve(PreserveOp) {} |
| 197 | |
| 198 | bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; |
| 199 | |
| 200 | MachineOperand *getPreservedOperand() const { return Preserve; } |
| 201 | |
| 202 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 203 | void print(raw_ostream& OS) const override; |
| 204 | #endif |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 205 | }; |
| 206 | |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 207 | } // end anonymous namespace |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 208 | |
| 209 | INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false) |
| 210 | |
| 211 | char SIPeepholeSDWA::ID = 0; |
| 212 | |
| 213 | char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID; |
| 214 | |
| 215 | FunctionPass *llvm::createSIPeepholeSDWAPass() { |
| 216 | return new SIPeepholeSDWA(); |
| 217 | } |
| 218 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 219 | |
| 220 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 221 | static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) { |
| 222 | switch(Sel) { |
| 223 | case BYTE_0: OS << "BYTE_0"; break; |
| 224 | case BYTE_1: OS << "BYTE_1"; break; |
| 225 | case BYTE_2: OS << "BYTE_2"; break; |
| 226 | case BYTE_3: OS << "BYTE_3"; break; |
| 227 | case WORD_0: OS << "WORD_0"; break; |
| 228 | case WORD_1: OS << "WORD_1"; break; |
| 229 | case DWORD: OS << "DWORD"; break; |
| 230 | } |
| 231 | return OS; |
| 232 | } |
| 233 | |
| 234 | static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) { |
| 235 | switch(Un) { |
| 236 | case UNUSED_PAD: OS << "UNUSED_PAD"; break; |
| 237 | case UNUSED_SEXT: OS << "UNUSED_SEXT"; break; |
| 238 | case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break; |
| 239 | } |
| 240 | return OS; |
| 241 | } |
| 242 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 243 | static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) { |
| 244 | Operand.print(OS); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 245 | return OS; |
| 246 | } |
| 247 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 248 | LLVM_DUMP_METHOD |
| 249 | void SDWASrcOperand::print(raw_ostream& OS) const { |
| 250 | OS << "SDWA src: " << *getTargetOperand() |
| 251 | << " src_sel:" << getSrcSel() |
| 252 | << " abs:" << getAbs() << " neg:" << getNeg() |
| 253 | << " sext:" << getSext() << '\n'; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 254 | } |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 255 | |
| 256 | LLVM_DUMP_METHOD |
| 257 | void SDWADstOperand::print(raw_ostream& OS) const { |
| 258 | OS << "SDWA dst: " << *getTargetOperand() |
| 259 | << " dst_sel:" << getDstSel() |
| 260 | << " dst_unused:" << getDstUnused() << '\n'; |
| 261 | } |
| 262 | |
| 263 | LLVM_DUMP_METHOD |
| 264 | void SDWADstPreserveOperand::print(raw_ostream& OS) const { |
| 265 | OS << "SDWA preserve dst: " << *getTargetOperand() |
| 266 | << " dst_sel:" << getDstSel() |
| 267 | << " preserve:" << *getPreservedOperand() << '\n'; |
| 268 | } |
| 269 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 270 | #endif |
| 271 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 272 | static void copyRegOperand(MachineOperand &To, const MachineOperand &From) { |
| 273 | assert(To.isReg() && From.isReg()); |
| 274 | To.setReg(From.getReg()); |
| 275 | To.setSubReg(From.getSubReg()); |
| 276 | To.setIsUndef(From.isUndef()); |
| 277 | if (To.isUse()) { |
| 278 | To.setIsKill(From.isKill()); |
| 279 | } else { |
| 280 | To.setIsDead(From.isDead()); |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) { |
| 285 | return LHS.isReg() && |
| 286 | RHS.isReg() && |
| 287 | LHS.getReg() == RHS.getReg() && |
| 288 | LHS.getSubReg() == RHS.getSubReg(); |
| 289 | } |
| 290 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 291 | static MachineOperand *findSingleRegUse(const MachineOperand *Reg, |
| 292 | const MachineRegisterInfo *MRI) { |
| 293 | if (!Reg->isReg() || !Reg->isDef()) |
| 294 | return nullptr; |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 295 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 296 | MachineOperand *ResMO = nullptr; |
| 297 | for (MachineOperand &UseMO : MRI->use_nodbg_operands(Reg->getReg())) { |
| 298 | // If there exist use of subreg of Reg then return nullptr |
| 299 | if (!isSameReg(UseMO, *Reg)) |
| 300 | return nullptr; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 301 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 302 | // Check that there is only one instruction that uses Reg |
| 303 | if (!ResMO) { |
| 304 | ResMO = &UseMO; |
| 305 | } else if (ResMO->getParent() != UseMO.getParent()) { |
| 306 | return nullptr; |
| 307 | } |
| 308 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 309 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 310 | return ResMO; |
| 311 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 312 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 313 | static MachineOperand *findSingleRegDef(const MachineOperand *Reg, |
| 314 | const MachineRegisterInfo *MRI) { |
| 315 | if (!Reg->isReg()) |
| 316 | return nullptr; |
| 317 | |
| 318 | MachineInstr *DefInstr = MRI->getUniqueVRegDef(Reg->getReg()); |
| 319 | if (!DefInstr) |
| 320 | return nullptr; |
| 321 | |
| 322 | for (auto &DefMO : DefInstr->defs()) { |
| 323 | if (DefMO.isReg() && DefMO.getReg() == Reg->getReg()) |
| 324 | return &DefMO; |
| 325 | } |
| 326 | |
Matt Arsenault | 8ae38bc | 2017-12-05 20:32:01 +0000 | [diff] [blame] | 327 | // Ignore implicit defs. |
| 328 | return nullptr; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 329 | } |
| 330 | |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 331 | uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, |
| 332 | const MachineOperand *SrcOp) const { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 333 | uint64_t Mods = 0; |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 334 | const auto *MI = SrcOp->getParent(); |
| 335 | if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) { |
| 336 | if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { |
| 337 | Mods = Mod->getImm(); |
| 338 | } |
| 339 | } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) { |
| 340 | if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) { |
| 341 | Mods = Mod->getImm(); |
| 342 | } |
| 343 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 344 | if (Abs || Neg) { |
| 345 | assert(!Sext && |
| 346 | "Float and integer src modifiers can't be set simulteniously"); |
| 347 | Mods |= Abs ? SISrcMods::ABS : 0; |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 348 | Mods ^= Neg ? SISrcMods::NEG : 0; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 349 | } else if (Sext) { |
| 350 | Mods |= SISrcMods::SEXT; |
| 351 | } |
| 352 | |
| 353 | return Mods; |
| 354 | } |
| 355 | |
| 356 | MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) { |
| 357 | // For SDWA src operand potential instruction is one that use register |
| 358 | // defined by parent instruction |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 359 | MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI()); |
| 360 | if (!PotentialMO) |
| 361 | return nullptr; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 362 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 363 | return PotentialMO->getParent(); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 364 | } |
| 365 | |
| 366 | bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { |
| 367 | // Find operand in instruction that matches source operand and replace it with |
| 368 | // target operand. Set corresponding src_sel |
| 369 | |
| 370 | MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 371 | MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); |
| 372 | MachineOperand *SrcMods = |
| 373 | TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 374 | assert(Src && (Src->isReg() || Src->isImm())); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 375 | if (!isSameReg(*Src, *getReplacedOperand())) { |
| 376 | // If this is not src0 then it should be src1 |
| 377 | Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 378 | SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); |
| 379 | SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); |
| 380 | |
| 381 | assert(Src && Src->isReg()); |
| 382 | |
| 383 | if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || |
| 384 | MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 385 | !isSameReg(*Src, *getReplacedOperand())) { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 386 | // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to |
| 387 | // src2. This is not allowed. |
| 388 | return false; |
| 389 | } |
| 390 | |
| 391 | assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods); |
| 392 | } |
| 393 | copyRegOperand(*Src, *getTargetOperand()); |
| 394 | SrcSel->setImm(getSrcSel()); |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 395 | SrcMods->setImm(getSrcMods(TII, Src)); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 396 | getTargetOperand()->setIsKill(false); |
| 397 | return true; |
| 398 | } |
| 399 | |
| 400 | MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) { |
| 401 | // For SDWA dst operand potential instruction is one that defines register |
| 402 | // that this operand uses |
| 403 | MachineRegisterInfo *MRI = getMRI(); |
| 404 | MachineInstr *ParentMI = getParentInst(); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 405 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 406 | MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI); |
| 407 | if (!PotentialMO) |
| 408 | return nullptr; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 409 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 410 | // Check that ParentMI is the only instruction that uses replaced register |
| 411 | for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) { |
| 412 | if (&UseInst != ParentMI) |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 413 | return nullptr; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 414 | } |
| 415 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 416 | return PotentialMO->getParent(); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 417 | } |
| 418 | |
| 419 | bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { |
| 420 | // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused |
| 421 | |
| 422 | if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || |
| 423 | MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && |
| 424 | getDstSel() != AMDGPU::SDWA::DWORD) { |
| 425 | // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD |
| 426 | return false; |
| 427 | } |
| 428 | |
| 429 | MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 430 | assert(Operand && |
| 431 | Operand->isReg() && |
| 432 | isSameReg(*Operand, *getReplacedOperand())); |
| 433 | copyRegOperand(*Operand, *getTargetOperand()); |
| 434 | MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); |
| 435 | assert(DstSel); |
| 436 | DstSel->setImm(getDstSel()); |
| 437 | MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); |
| 438 | assert(DstUnused); |
| 439 | DstUnused->setImm(getDstUnused()); |
| 440 | |
| 441 | // Remove original instruction because it would conflict with our new |
| 442 | // instruction by register definition |
| 443 | getParentInst()->eraseFromParent(); |
| 444 | return true; |
| 445 | } |
| 446 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 447 | bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI, |
| 448 | const SIInstrInfo *TII) { |
| 449 | // MI should be moved right before v_or_b32. |
| 450 | // For this we should clear all kill flags on uses of MI src-operands or else |
| 451 | // we can encounter problem with use of killed operand. |
| 452 | for (MachineOperand &MO : MI.uses()) { |
| 453 | if (!MO.isReg()) |
| 454 | continue; |
| 455 | getMRI()->clearKillFlags(MO.getReg()); |
| 456 | } |
| 457 | |
| 458 | // Move MI before v_or_b32 |
| 459 | auto MBB = MI.getParent(); |
| 460 | MBB->remove(&MI); |
| 461 | MBB->insert(getParentInst(), &MI); |
| 462 | |
| 463 | // Add Implicit use of preserved register |
| 464 | MachineInstrBuilder MIB(*MBB->getParent(), MI); |
| 465 | MIB.addReg(getPreservedOperand()->getReg(), |
| 466 | RegState::ImplicitKill, |
| 467 | getPreservedOperand()->getSubReg()); |
| 468 | |
| 469 | // Tie dst to implicit use |
| 470 | MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst), |
| 471 | MI.getNumOperands() - 1); |
| 472 | |
| 473 | // Convert MI as any other SDWADstOperand and remove v_or_b32 |
| 474 | return SDWADstOperand::convertToSDWA(MI, TII); |
| 475 | } |
| 476 | |
Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 477 | Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const { |
| 478 | if (Op.isImm()) { |
| 479 | return Op.getImm(); |
| 480 | } |
| 481 | |
| 482 | // If this is not immediate then it can be copy of immediate value, e.g.: |
Francis Visoiu Mistrih | a8a83d1 | 2017-12-07 10:40:31 +0000 | [diff] [blame^] | 483 | // %1 = S_MOV_B32 255; |
Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 484 | if (Op.isReg()) { |
| 485 | for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) { |
| 486 | if (!isSameReg(Op, Def)) |
| 487 | continue; |
| 488 | |
| 489 | const MachineInstr *DefInst = Def.getParent(); |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 490 | if (!TII->isFoldableCopy(*DefInst)) |
Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 491 | return None; |
| 492 | |
| 493 | const MachineOperand &Copied = DefInst->getOperand(1); |
| 494 | if (!Copied.isImm()) |
| 495 | return None; |
| 496 | |
| 497 | return Copied.getImm(); |
| 498 | } |
| 499 | } |
| 500 | |
| 501 | return None; |
| 502 | } |
| 503 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 504 | std::unique_ptr<SDWAOperand> |
| 505 | SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { |
| 506 | unsigned Opcode = MI.getOpcode(); |
| 507 | switch (Opcode) { |
| 508 | case AMDGPU::V_LSHRREV_B32_e32: |
| 509 | case AMDGPU::V_ASHRREV_I32_e32: |
| 510 | case AMDGPU::V_LSHLREV_B32_e32: |
| 511 | case AMDGPU::V_LSHRREV_B32_e64: |
| 512 | case AMDGPU::V_ASHRREV_I32_e64: |
| 513 | case AMDGPU::V_LSHLREV_B32_e64: { |
| 514 | // from: v_lshrrev_b32_e32 v1, 16/24, v0 |
| 515 | // to SDWA src:v0 src_sel:WORD_1/BYTE_3 |
| 516 | |
| 517 | // from: v_ashrrev_i32_e32 v1, 16/24, v0 |
| 518 | // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1 |
| 519 | |
| 520 | // from: v_lshlrev_b32_e32 v1, 16/24, v0 |
| 521 | // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD |
| 522 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 523 | auto Imm = foldToImm(*Src0); |
| 524 | if (!Imm) |
| 525 | break; |
| 526 | |
| 527 | if (*Imm != 16 && *Imm != 24) |
| 528 | break; |
| 529 | |
| 530 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 531 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 532 | if (TRI->isPhysicalRegister(Src1->getReg()) || |
| 533 | TRI->isPhysicalRegister(Dst->getReg())) |
| 534 | break; |
| 535 | |
| 536 | if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || |
| 537 | Opcode == AMDGPU::V_LSHLREV_B32_e64) { |
| 538 | return make_unique<SDWADstOperand>( |
| 539 | Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); |
| 540 | } else { |
| 541 | return make_unique<SDWASrcOperand>( |
| 542 | Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, |
| 543 | Opcode != AMDGPU::V_LSHRREV_B32_e32 && |
| 544 | Opcode != AMDGPU::V_LSHRREV_B32_e64); |
| 545 | } |
| 546 | break; |
| 547 | } |
| 548 | |
| 549 | case AMDGPU::V_LSHRREV_B16_e32: |
| 550 | case AMDGPU::V_ASHRREV_I16_e32: |
| 551 | case AMDGPU::V_LSHLREV_B16_e32: |
| 552 | case AMDGPU::V_LSHRREV_B16_e64: |
| 553 | case AMDGPU::V_ASHRREV_I16_e64: |
| 554 | case AMDGPU::V_LSHLREV_B16_e64: { |
| 555 | // from: v_lshrrev_b16_e32 v1, 8, v0 |
| 556 | // to SDWA src:v0 src_sel:BYTE_1 |
| 557 | |
| 558 | // from: v_ashrrev_i16_e32 v1, 8, v0 |
| 559 | // to SDWA src:v0 src_sel:BYTE_1 sext:1 |
| 560 | |
| 561 | // from: v_lshlrev_b16_e32 v1, 8, v0 |
| 562 | // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD |
| 563 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 564 | auto Imm = foldToImm(*Src0); |
| 565 | if (!Imm || *Imm != 8) |
| 566 | break; |
| 567 | |
| 568 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 569 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 570 | |
| 571 | if (TRI->isPhysicalRegister(Src1->getReg()) || |
| 572 | TRI->isPhysicalRegister(Dst->getReg())) |
| 573 | break; |
| 574 | |
| 575 | if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || |
| 576 | Opcode == AMDGPU::V_LSHLREV_B16_e64) { |
| 577 | return make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD); |
| 578 | } else { |
| 579 | return make_unique<SDWASrcOperand>( |
| 580 | Src1, Dst, BYTE_1, false, false, |
| 581 | Opcode != AMDGPU::V_LSHRREV_B16_e32 && |
| 582 | Opcode != AMDGPU::V_LSHRREV_B16_e64); |
| 583 | } |
| 584 | break; |
| 585 | } |
| 586 | |
| 587 | case AMDGPU::V_BFE_I32: |
| 588 | case AMDGPU::V_BFE_U32: { |
| 589 | // e.g.: |
| 590 | // from: v_bfe_u32 v1, v0, 8, 8 |
| 591 | // to SDWA src:v0 src_sel:BYTE_1 |
| 592 | |
| 593 | // offset | width | src_sel |
| 594 | // ------------------------ |
| 595 | // 0 | 8 | BYTE_0 |
| 596 | // 0 | 16 | WORD_0 |
| 597 | // 0 | 32 | DWORD ? |
| 598 | // 8 | 8 | BYTE_1 |
| 599 | // 16 | 8 | BYTE_2 |
| 600 | // 16 | 16 | WORD_1 |
| 601 | // 24 | 8 | BYTE_3 |
| 602 | |
| 603 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 604 | auto Offset = foldToImm(*Src1); |
| 605 | if (!Offset) |
| 606 | break; |
| 607 | |
| 608 | MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); |
| 609 | auto Width = foldToImm(*Src2); |
| 610 | if (!Width) |
| 611 | break; |
| 612 | |
| 613 | SdwaSel SrcSel = DWORD; |
| 614 | |
| 615 | if (*Offset == 0 && *Width == 8) |
| 616 | SrcSel = BYTE_0; |
| 617 | else if (*Offset == 0 && *Width == 16) |
| 618 | SrcSel = WORD_0; |
| 619 | else if (*Offset == 0 && *Width == 32) |
| 620 | SrcSel = DWORD; |
| 621 | else if (*Offset == 8 && *Width == 8) |
| 622 | SrcSel = BYTE_1; |
| 623 | else if (*Offset == 16 && *Width == 8) |
| 624 | SrcSel = BYTE_2; |
| 625 | else if (*Offset == 16 && *Width == 16) |
| 626 | SrcSel = WORD_1; |
| 627 | else if (*Offset == 24 && *Width == 8) |
| 628 | SrcSel = BYTE_3; |
| 629 | else |
| 630 | break; |
| 631 | |
| 632 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 633 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 634 | |
| 635 | if (TRI->isPhysicalRegister(Src0->getReg()) || |
| 636 | TRI->isPhysicalRegister(Dst->getReg())) |
| 637 | break; |
| 638 | |
| 639 | return make_unique<SDWASrcOperand>( |
| 640 | Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32); |
| 641 | } |
| 642 | |
| 643 | case AMDGPU::V_AND_B32_e32: |
| 644 | case AMDGPU::V_AND_B32_e64: { |
| 645 | // e.g.: |
| 646 | // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 |
| 647 | // to SDWA src:v0 src_sel:WORD_0/BYTE_0 |
| 648 | |
| 649 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 650 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 651 | auto ValSrc = Src1; |
| 652 | auto Imm = foldToImm(*Src0); |
| 653 | |
| 654 | if (!Imm) { |
| 655 | Imm = foldToImm(*Src1); |
| 656 | ValSrc = Src0; |
| 657 | } |
| 658 | |
| 659 | if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) |
| 660 | break; |
| 661 | |
| 662 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 663 | |
| 664 | if (TRI->isPhysicalRegister(Src1->getReg()) || |
| 665 | TRI->isPhysicalRegister(Dst->getReg())) |
| 666 | break; |
| 667 | |
| 668 | return make_unique<SDWASrcOperand>( |
| 669 | ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); |
| 670 | } |
| 671 | |
| 672 | case AMDGPU::V_OR_B32_e32: |
| 673 | case AMDGPU::V_OR_B32_e64: { |
| 674 | // Patterns for dst_unused:UNUSED_PRESERVE. |
| 675 | // e.g., from: |
| 676 | // v_add_f16_sdwa v0, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD |
| 677 | // src1_sel:WORD_1 src2_sel:WORD1 |
| 678 | // v_add_f16_e32 v3, v1, v2 |
| 679 | // v_or_b32_e32 v4, v0, v3 |
| 680 | // to SDWA preserve dst:v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE preserve:v3 |
| 681 | |
| 682 | // Check if one of operands of v_or_b32 is SDWA instruction |
| 683 | using CheckRetType = Optional<std::pair<MachineOperand *, MachineOperand *>>; |
| 684 | auto CheckOROperandsForSDWA = |
| 685 | [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType { |
| 686 | if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg()) |
| 687 | return CheckRetType(None); |
| 688 | |
| 689 | MachineOperand *Op1Def = findSingleRegDef(Op1, MRI); |
| 690 | if (!Op1Def) |
| 691 | return CheckRetType(None); |
| 692 | |
| 693 | MachineInstr *Op1Inst = Op1Def->getParent(); |
| 694 | if (!TII->isSDWA(*Op1Inst)) |
| 695 | return CheckRetType(None); |
| 696 | |
| 697 | MachineOperand *Op2Def = findSingleRegDef(Op2, MRI); |
| 698 | if (!Op2Def) |
| 699 | return CheckRetType(None); |
| 700 | |
| 701 | return CheckRetType(std::make_pair(Op1Def, Op2Def)); |
| 702 | }; |
| 703 | |
| 704 | MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 705 | MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 706 | assert(OrSDWA && OrOther); |
| 707 | auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther); |
| 708 | if (!Res) { |
| 709 | OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 710 | OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 711 | assert(OrSDWA && OrOther); |
| 712 | Res = CheckOROperandsForSDWA(OrSDWA, OrOther); |
| 713 | if (!Res) |
| 714 | break; |
| 715 | } |
| 716 | |
| 717 | MachineOperand *OrSDWADef = Res->first; |
| 718 | MachineOperand *OrOtherDef = Res->second; |
| 719 | assert(OrSDWADef && OrOtherDef); |
| 720 | |
| 721 | MachineInstr *SDWAInst = OrSDWADef->getParent(); |
| 722 | MachineInstr *OtherInst = OrOtherDef->getParent(); |
| 723 | |
| 724 | // Check that OtherInstr is actually bitwise compatible with SDWAInst = their |
| 725 | // destination patterns don't overlap. Compatible instruction can be either |
| 726 | // regular instruction with compatible bitness or SDWA instruction with |
| 727 | // correct dst_sel |
| 728 | // SDWAInst | OtherInst bitness / OtherInst dst_sel |
| 729 | // ----------------------------------------------------- |
| 730 | // DWORD | no / no |
| 731 | // WORD_0 | no / BYTE_2/3, WORD_1 |
| 732 | // WORD_1 | 8/16-bit instructions / BYTE_0/1, WORD_0 |
| 733 | // BYTE_0 | no / BYTE_1/2/3, WORD_1 |
| 734 | // BYTE_1 | 8-bit / BYTE_0/2/3, WORD_1 |
| 735 | // BYTE_2 | 8/16-bit / BYTE_0/1/3. WORD_0 |
| 736 | // BYTE_3 | 8/16/24-bit / BYTE_0/1/2, WORD_0 |
| 737 | // E.g. if SDWAInst is v_add_f16_sdwa dst_sel:WORD_1 then v_add_f16 is OK |
| 738 | // but v_add_f32 is not. |
| 739 | |
| 740 | // TODO: add support for non-SDWA instructions as OtherInst. |
| 741 | // For now this only works with SDWA instructions. For regular instructions |
| 742 | // there is no way to determine if instruction write only 8/16/24-bit out of |
| 743 | // full register size and all registers are at min 32-bit wide. |
| 744 | if (!TII->isSDWA(*OtherInst)) |
| 745 | break; |
| 746 | |
| 747 | SdwaSel DstSel = static_cast<SdwaSel>( |
| 748 | TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));; |
| 749 | SdwaSel OtherDstSel = static_cast<SdwaSel>( |
| 750 | TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel)); |
| 751 | |
| 752 | bool DstSelAgree = false; |
| 753 | switch (DstSel) { |
| 754 | case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) || |
| 755 | (OtherDstSel == BYTE_3) || |
| 756 | (OtherDstSel == WORD_1)); |
| 757 | break; |
| 758 | case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) || |
| 759 | (OtherDstSel == BYTE_1) || |
| 760 | (OtherDstSel == WORD_0)); |
| 761 | break; |
| 762 | case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) || |
| 763 | (OtherDstSel == BYTE_2) || |
| 764 | (OtherDstSel == BYTE_3) || |
| 765 | (OtherDstSel == WORD_1)); |
| 766 | break; |
| 767 | case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) || |
| 768 | (OtherDstSel == BYTE_2) || |
| 769 | (OtherDstSel == BYTE_3) || |
| 770 | (OtherDstSel == WORD_1)); |
| 771 | break; |
| 772 | case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) || |
| 773 | (OtherDstSel == BYTE_1) || |
| 774 | (OtherDstSel == BYTE_3) || |
| 775 | (OtherDstSel == WORD_0)); |
| 776 | break; |
| 777 | case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) || |
| 778 | (OtherDstSel == BYTE_1) || |
| 779 | (OtherDstSel == BYTE_2) || |
| 780 | (OtherDstSel == WORD_0)); |
| 781 | break; |
| 782 | default: DstSelAgree = false; |
| 783 | } |
| 784 | |
| 785 | if (!DstSelAgree) |
| 786 | break; |
| 787 | |
| 788 | // Also OtherInst dst_unused should be UNUSED_PAD |
| 789 | DstUnused OtherDstUnused = static_cast<DstUnused>( |
| 790 | TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused)); |
| 791 | if (OtherDstUnused != DstUnused::UNUSED_PAD) |
| 792 | break; |
| 793 | |
| 794 | // Create DstPreserveOperand |
| 795 | MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 796 | assert(OrDst && OrDst->isReg()); |
| 797 | |
| 798 | return make_unique<SDWADstPreserveOperand>( |
| 799 | OrDst, OrSDWADef, OrOtherDef, DstSel); |
| 800 | |
| 801 | } |
| 802 | } |
| 803 | |
| 804 | return std::unique_ptr<SDWAOperand>(nullptr); |
| 805 | } |
| 806 | |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 807 | void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) { |
| 808 | for (MachineBasicBlock &MBB : MF) { |
| 809 | for (MachineInstr &MI : MBB) { |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 810 | if (auto Operand = matchSDWAOperand(MI)) { |
| 811 | DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n'); |
| 812 | SDWAOperands[&MI] = std::move(Operand); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 813 | ++NumSDWAPatternsFound; |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 814 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 815 | } |
| 816 | } |
| 817 | } |
| 818 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 819 | bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, |
| 820 | const SISubtarget &ST) const { |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 821 | // Check if this is already an SDWA instruction |
| 822 | unsigned Opc = MI.getOpcode(); |
| 823 | if (TII->isSDWA(Opc)) |
| 824 | return true; |
| 825 | |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 826 | // Check if this instruction has opcode that supports SDWA |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 827 | if (AMDGPU::getSDWAOp(Opc) == -1) |
| 828 | Opc = AMDGPU::getVOPe32(Opc); |
| 829 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 830 | if (AMDGPU::getSDWAOp(Opc) == -1) |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 831 | return false; |
| 832 | |
| 833 | if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) |
| 834 | return false; |
| 835 | |
| 836 | if (TII->isVOPC(Opc)) { |
| 837 | if (!ST.hasSDWASdst()) { |
| 838 | const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); |
| 839 | if (SDst && SDst->getReg() != AMDGPU::VCC) |
| 840 | return false; |
| 841 | } |
| 842 | |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 843 | if (!ST.hasSDWAOutModsVOPC() && |
| 844 | (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) || |
| 845 | TII->hasModifiersSet(MI, AMDGPU::OpName::omod))) |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 846 | return false; |
| 847 | |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 848 | } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) || |
| 849 | !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) { |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 850 | return false; |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 851 | } |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 852 | |
| 853 | if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 || |
| 854 | Opc == AMDGPU::V_MAC_F32_e32)) |
| 855 | return false; |
| 856 | |
| 857 | return true; |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 858 | } |
| 859 | |
| 860 | bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, |
| 861 | const SDWAOperandsVector &SDWAOperands) { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 862 | // Convert to sdwa |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 863 | int SDWAOpcode; |
| 864 | unsigned Opcode = MI.getOpcode(); |
| 865 | if (TII->isSDWA(Opcode)) { |
| 866 | SDWAOpcode = Opcode; |
| 867 | } else { |
| 868 | SDWAOpcode = AMDGPU::getSDWAOp(Opcode); |
| 869 | if (SDWAOpcode == -1) |
| 870 | SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode)); |
| 871 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 872 | assert(SDWAOpcode != -1); |
| 873 | |
| 874 | const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); |
| 875 | |
| 876 | // Create SDWA version of instruction MI and initialize its operands |
| 877 | MachineInstrBuilder SDWAInst = |
| 878 | BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); |
| 879 | |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 880 | // Copy dst, if it is present in original then should also be present in SDWA |
| 881 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 882 | if (Dst) { |
| 883 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); |
| 884 | SDWAInst.add(*Dst); |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 885 | } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) { |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 886 | assert(Dst && |
| 887 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); |
| 888 | SDWAInst.add(*Dst); |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 889 | } else { |
| 890 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); |
| 891 | SDWAInst.addReg(AMDGPU::VCC, RegState::Define); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 892 | } |
| 893 | |
| 894 | // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and |
| 895 | // src0_modifiers (except for v_nop_sdwa, but it can't get here) |
| 896 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 897 | assert( |
| 898 | Src0 && |
| 899 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && |
| 900 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 901 | if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) |
| 902 | SDWAInst.addImm(Mod->getImm()); |
| 903 | else |
| 904 | SDWAInst.addImm(0); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 905 | SDWAInst.add(*Src0); |
| 906 | |
| 907 | // Copy src1 if present, initialize src1_modifiers. |
| 908 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 909 | if (Src1) { |
| 910 | assert( |
| 911 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && |
| 912 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 913 | if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) |
| 914 | SDWAInst.addImm(Mod->getImm()); |
| 915 | else |
| 916 | SDWAInst.addImm(0); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 917 | SDWAInst.add(*Src1); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 918 | } |
| 919 | |
| 920 | if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || |
| 921 | SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) { |
| 922 | // v_mac_f16/32 has additional src2 operand tied to vdst |
| 923 | MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); |
| 924 | assert(Src2); |
| 925 | SDWAInst.add(*Src2); |
| 926 | } |
| 927 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 928 | // Copy clamp if present, initialize otherwise |
| 929 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1); |
| 930 | MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp); |
| 931 | if (Clamp) { |
| 932 | SDWAInst.add(*Clamp); |
| 933 | } else { |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 934 | SDWAInst.addImm(0); |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 935 | } |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 936 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 937 | // Copy omod if present, initialize otherwise if needed |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 938 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { |
| 939 | MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); |
| 940 | if (OMod) { |
| 941 | SDWAInst.add(*OMod); |
| 942 | } else { |
| 943 | SDWAInst.addImm(0); |
| 944 | } |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 945 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 946 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 947 | // Copy dst_sel if present, initialize otherwise if needed |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 948 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) { |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 949 | MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); |
| 950 | if (DstSel) { |
| 951 | SDWAInst.add(*DstSel); |
| 952 | } else { |
| 953 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); |
| 954 | } |
| 955 | } |
| 956 | |
| 957 | // Copy dst_unused if present, initialize otherwise if needed |
| 958 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) { |
| 959 | MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); |
| 960 | if (DstUnused) { |
| 961 | SDWAInst.add(*DstUnused); |
| 962 | } else { |
| 963 | SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD); |
| 964 | } |
| 965 | } |
| 966 | |
| 967 | // Copy src0_sel if present, initialize otherwise |
| 968 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1); |
| 969 | MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); |
| 970 | if (Src0Sel) { |
| 971 | SDWAInst.add(*Src0Sel); |
| 972 | } else { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 973 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 974 | } |
| 975 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 976 | // Copy src1_sel if present, initialize otherwise if needed |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 977 | if (Src1) { |
| 978 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1); |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 979 | MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); |
| 980 | if (Src1Sel) { |
| 981 | SDWAInst.add(*Src1Sel); |
| 982 | } else { |
| 983 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); |
| 984 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 985 | } |
| 986 | |
| 987 | // Apply all sdwa operand pattenrs |
| 988 | bool Converted = false; |
| 989 | for (auto &Operand : SDWAOperands) { |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 990 | // There should be no intesection between SDWA operands and potential MIs |
| 991 | // e.g.: |
| 992 | // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0 |
| 993 | // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0 |
| 994 | // v_add_u32 v3, v4, v2 |
| 995 | // |
| 996 | // In that example it is possible that we would fold 2nd instruction into 3rd |
| 997 | // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was |
| 998 | // already destroyed). So if SDWAOperand is also a potential MI then do not |
| 999 | // apply it. |
| 1000 | if (PotentialMatches.count(Operand->getParentInst()) == 0) |
| 1001 | Converted |= Operand->convertToSDWA(*SDWAInst, TII); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1002 | } |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1003 | if (Converted) { |
| 1004 | ConvertedInstructions.push_back(SDWAInst); |
| 1005 | } else { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1006 | SDWAInst->eraseFromParent(); |
| 1007 | return false; |
| 1008 | } |
| 1009 | |
| 1010 | DEBUG(dbgs() << "Convert instruction:" << MI |
| 1011 | << "Into:" << *SDWAInst << '\n'); |
| 1012 | ++NumSDWAInstructionsPeepholed; |
| 1013 | |
| 1014 | MI.eraseFromParent(); |
| 1015 | return true; |
| 1016 | } |
| 1017 | |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1018 | // If an instruction was converted to SDWA it should not have immediates or SGPR |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1019 | // operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs. |
| 1020 | void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const { |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1021 | const MCInstrDesc &Desc = TII->get(MI.getOpcode()); |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1022 | unsigned ConstantBusCount = 0; |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1023 | for (MachineOperand &Op : MI.explicit_uses()) { |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1024 | if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg()))) |
| 1025 | continue; |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1026 | |
| 1027 | unsigned I = MI.getOperandNo(&Op); |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1028 | if (Desc.OpInfo[I].RegClass == -1 || |
| 1029 | !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass))) |
| 1030 | continue; |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1031 | |
| 1032 | if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() && |
| 1033 | TRI->isSGPRReg(*MRI, Op.getReg())) { |
| 1034 | ++ConstantBusCount; |
| 1035 | continue; |
| 1036 | } |
| 1037 | |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1038 | unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
| 1039 | auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), |
| 1040 | TII->get(AMDGPU::V_MOV_B32_e32), VGPR); |
| 1041 | if (Op.isImm()) |
| 1042 | Copy.addImm(Op.getImm()); |
| 1043 | else if (Op.isReg()) |
| 1044 | Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0, |
| 1045 | Op.getSubReg()); |
| 1046 | Op.ChangeToRegister(VGPR, false); |
| 1047 | } |
| 1048 | } |
| 1049 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1050 | bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { |
| 1051 | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); |
| 1052 | |
Matt Arsenault | f42074b | 2017-10-10 20:48:36 +0000 | [diff] [blame] | 1053 | if (!ST.hasSDWA() || skipFunction(*MF.getFunction())) |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1054 | return false; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1055 | |
| 1056 | MRI = &MF.getRegInfo(); |
| 1057 | TRI = ST.getRegisterInfo(); |
| 1058 | TII = ST.getInstrInfo(); |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 1059 | |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 1060 | // Find all SDWA operands in MF. |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1061 | bool Changed = false; |
| 1062 | bool Ret = false; |
| 1063 | do { |
| 1064 | matchSDWAOperands(MF); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1065 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1066 | for (const auto &OperandPair : SDWAOperands) { |
| 1067 | const auto &Operand = OperandPair.second; |
| 1068 | MachineInstr *PotentialMI = Operand->potentialToConvert(TII); |
| 1069 | if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) { |
| 1070 | PotentialMatches[PotentialMI].push_back(Operand.get()); |
| 1071 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1072 | } |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 1073 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1074 | for (auto &PotentialPair : PotentialMatches) { |
| 1075 | MachineInstr &PotentialMI = *PotentialPair.first; |
| 1076 | convertToSDWA(PotentialMI, PotentialPair.second); |
| 1077 | } |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 1078 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1079 | PotentialMatches.clear(); |
| 1080 | SDWAOperands.clear(); |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1081 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1082 | Changed = !ConvertedInstructions.empty(); |
| 1083 | |
| 1084 | if (Changed) |
| 1085 | Ret = true; |
| 1086 | |
| 1087 | while (!ConvertedInstructions.empty()) |
| 1088 | legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST); |
| 1089 | } while (Changed); |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1090 | |
Stanislav Mekhanoshin | e4cda74 | 2017-06-06 16:42:30 +0000 | [diff] [blame] | 1091 | return Ret; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1092 | } |