Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 1 | //===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===// |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | /// \file This pass tries to apply several peephole SDWA patterns. |
| 11 | /// |
| 12 | /// E.g. original: |
Francis Visoiu Mistrih | 93ef145 | 2017-11-30 12:12:19 +0000 | [diff] [blame] | 13 | /// V_LSHRREV_B32_e32 %0, 16, %1 |
| 14 | /// V_ADD_I32_e32 %2, %0, %3 |
| 15 | /// V_LSHLREV_B32_e32 %4, 16, %2 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 16 | /// |
| 17 | /// Replace: |
Francis Visoiu Mistrih | 93ef145 | 2017-11-30 12:12:19 +0000 | [diff] [blame] | 18 | /// V_ADD_I32_sdwa %4, %1, %3 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 19 | /// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD |
| 20 | /// |
| 21 | //===----------------------------------------------------------------------===// |
| 22 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 23 | #include "AMDGPU.h" |
| 24 | #include "AMDGPUSubtarget.h" |
| 25 | #include "SIDefines.h" |
| 26 | #include "SIInstrInfo.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 27 | #include "SIRegisterInfo.h" |
| 28 | #include "Utils/AMDGPUBaseInfo.h" |
| 29 | #include "llvm/ADT/None.h" |
| 30 | #include "llvm/ADT/Optional.h" |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 31 | #include "llvm/ADT/STLExtras.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 32 | #include "llvm/ADT/SmallVector.h" |
Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 33 | #include "llvm/ADT/Statistic.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 34 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 35 | #include "llvm/CodeGen/MachineFunction.h" |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 36 | #include "llvm/CodeGen/MachineFunctionPass.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 37 | #include "llvm/CodeGen/MachineInstr.h" |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 38 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 39 | #include "llvm/CodeGen/MachineOperand.h" |
| 40 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
David Blaikie | b3bde2e | 2017-11-17 01:07:10 +0000 | [diff] [blame] | 41 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
Nico Weber | 432a388 | 2018-04-30 14:59:11 +0000 | [diff] [blame] | 42 | #include "llvm/Config/llvm-config.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 43 | #include "llvm/MC/LaneBitmask.h" |
| 44 | #include "llvm/MC/MCInstrDesc.h" |
| 45 | #include "llvm/Pass.h" |
| 46 | #include "llvm/Support/Debug.h" |
| 47 | #include "llvm/Support/raw_ostream.h" |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 48 | #include <algorithm> |
| 49 | #include <cassert> |
| 50 | #include <cstdint> |
| 51 | #include <memory> |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 52 | #include <unordered_map> |
| 53 | |
| 54 | using namespace llvm; |
| 55 | |
| 56 | #define DEBUG_TYPE "si-peephole-sdwa" |
| 57 | |
| 58 | STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found."); |
| 59 | STATISTIC(NumSDWAInstructionsPeepholed, |
| 60 | "Number of instruction converted to SDWA."); |
| 61 | |
| 62 | namespace { |
| 63 | |
| 64 | class SDWAOperand; |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 65 | class SDWADstOperand; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 66 | |
| 67 | class SIPeepholeSDWA : public MachineFunctionPass { |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 68 | public: |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 69 | using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>; |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 70 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 71 | private: |
| 72 | MachineRegisterInfo *MRI; |
| 73 | const SIRegisterInfo *TRI; |
| 74 | const SIInstrInfo *TII; |
| 75 | |
| 76 | std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands; |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 77 | std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches; |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 78 | SmallVector<MachineInstr *, 8> ConvertedInstructions; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 79 | |
Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 80 | Optional<int64_t> foldToImm(const MachineOperand &Op) const; |
| 81 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 82 | public: |
| 83 | static char ID; |
| 84 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 85 | SIPeepholeSDWA() : MachineFunctionPass(ID) { |
| 86 | initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry()); |
| 87 | } |
| 88 | |
| 89 | bool runOnMachineFunction(MachineFunction &MF) override; |
Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 90 | void matchSDWAOperands(MachineBasicBlock &MBB); |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 91 | std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI); |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 92 | bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 93 | bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 94 | void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 95 | |
| 96 | StringRef getPassName() const override { return "SI Peephole SDWA"; } |
| 97 | |
| 98 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 99 | AU.setPreservesCFG(); |
| 100 | MachineFunctionPass::getAnalysisUsage(AU); |
| 101 | } |
| 102 | }; |
| 103 | |
| 104 | class SDWAOperand { |
| 105 | private: |
| 106 | MachineOperand *Target; // Operand that would be used in converted instruction |
| 107 | MachineOperand *Replaced; // Operand that would be replace by Target |
| 108 | |
| 109 | public: |
| 110 | SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp) |
| 111 | : Target(TargetOp), Replaced(ReplacedOp) { |
| 112 | assert(Target->isReg()); |
| 113 | assert(Replaced->isReg()); |
| 114 | } |
| 115 | |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 116 | virtual ~SDWAOperand() = default; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 117 | |
| 118 | virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0; |
| 119 | virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0; |
| 120 | |
| 121 | MachineOperand *getTargetOperand() const { return Target; } |
| 122 | MachineOperand *getReplacedOperand() const { return Replaced; } |
| 123 | MachineInstr *getParentInst() const { return Target->getParent(); } |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 124 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 125 | MachineRegisterInfo *getMRI() const { |
| 126 | return &getParentInst()->getParent()->getParent()->getRegInfo(); |
| 127 | } |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 128 | |
| 129 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 130 | virtual void print(raw_ostream& OS) const = 0; |
| 131 | void dump() const { print(dbgs()); } |
| 132 | #endif |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 133 | }; |
| 134 | |
| 135 | using namespace AMDGPU::SDWA; |
| 136 | |
| 137 | class SDWASrcOperand : public SDWAOperand { |
| 138 | private: |
| 139 | SdwaSel SrcSel; |
| 140 | bool Abs; |
| 141 | bool Neg; |
| 142 | bool Sext; |
| 143 | |
| 144 | public: |
| 145 | SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, |
| 146 | SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false, |
| 147 | bool Sext_ = false) |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 148 | : SDWAOperand(TargetOp, ReplacedOp), |
| 149 | SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 150 | |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 151 | MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; |
| 152 | bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 153 | |
| 154 | SdwaSel getSrcSel() const { return SrcSel; } |
| 155 | bool getAbs() const { return Abs; } |
| 156 | bool getNeg() const { return Neg; } |
| 157 | bool getSext() const { return Sext; } |
| 158 | |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 159 | uint64_t getSrcMods(const SIInstrInfo *TII, |
| 160 | const MachineOperand *SrcOp) const; |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 161 | |
| 162 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 163 | void print(raw_ostream& OS) const override; |
| 164 | #endif |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 165 | }; |
| 166 | |
| 167 | class SDWADstOperand : public SDWAOperand { |
| 168 | private: |
| 169 | SdwaSel DstSel; |
| 170 | DstUnused DstUn; |
| 171 | |
| 172 | public: |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 173 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 174 | SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, |
| 175 | SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD) |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 176 | : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {} |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 177 | |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 178 | MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; |
| 179 | bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 180 | |
| 181 | SdwaSel getDstSel() const { return DstSel; } |
| 182 | DstUnused getDstUnused() const { return DstUn; } |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 183 | |
| 184 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 185 | void print(raw_ostream& OS) const override; |
| 186 | #endif |
| 187 | }; |
| 188 | |
| 189 | class SDWADstPreserveOperand : public SDWADstOperand { |
| 190 | private: |
| 191 | MachineOperand *Preserve; |
| 192 | |
| 193 | public: |
| 194 | SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, |
| 195 | MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD) |
| 196 | : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE), |
| 197 | Preserve(PreserveOp) {} |
| 198 | |
| 199 | bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; |
| 200 | |
| 201 | MachineOperand *getPreservedOperand() const { return Preserve; } |
| 202 | |
| 203 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| 204 | void print(raw_ostream& OS) const override; |
| 205 | #endif |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 206 | }; |
| 207 | |
Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 208 | } // end anonymous namespace |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 209 | |
| 210 | INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false) |
| 211 | |
| 212 | char SIPeepholeSDWA::ID = 0; |
| 213 | |
| 214 | char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID; |
| 215 | |
| 216 | FunctionPass *llvm::createSIPeepholeSDWAPass() { |
| 217 | return new SIPeepholeSDWA(); |
| 218 | } |
| 219 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 220 | |
| 221 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
Matt Arsenault | c24d5e2 | 2018-02-08 22:46:38 +0000 | [diff] [blame] | 222 | static raw_ostream& operator<<(raw_ostream &OS, SdwaSel Sel) { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 223 | switch(Sel) { |
| 224 | case BYTE_0: OS << "BYTE_0"; break; |
| 225 | case BYTE_1: OS << "BYTE_1"; break; |
| 226 | case BYTE_2: OS << "BYTE_2"; break; |
| 227 | case BYTE_3: OS << "BYTE_3"; break; |
| 228 | case WORD_0: OS << "WORD_0"; break; |
| 229 | case WORD_1: OS << "WORD_1"; break; |
| 230 | case DWORD: OS << "DWORD"; break; |
| 231 | } |
| 232 | return OS; |
| 233 | } |
| 234 | |
| 235 | static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) { |
| 236 | switch(Un) { |
| 237 | case UNUSED_PAD: OS << "UNUSED_PAD"; break; |
| 238 | case UNUSED_SEXT: OS << "UNUSED_SEXT"; break; |
| 239 | case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break; |
| 240 | } |
| 241 | return OS; |
| 242 | } |
| 243 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 244 | static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) { |
| 245 | Operand.print(OS); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 246 | return OS; |
| 247 | } |
| 248 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 249 | LLVM_DUMP_METHOD |
| 250 | void SDWASrcOperand::print(raw_ostream& OS) const { |
| 251 | OS << "SDWA src: " << *getTargetOperand() |
| 252 | << " src_sel:" << getSrcSel() |
| 253 | << " abs:" << getAbs() << " neg:" << getNeg() |
| 254 | << " sext:" << getSext() << '\n'; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 255 | } |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 256 | |
| 257 | LLVM_DUMP_METHOD |
| 258 | void SDWADstOperand::print(raw_ostream& OS) const { |
| 259 | OS << "SDWA dst: " << *getTargetOperand() |
| 260 | << " dst_sel:" << getDstSel() |
| 261 | << " dst_unused:" << getDstUnused() << '\n'; |
| 262 | } |
| 263 | |
| 264 | LLVM_DUMP_METHOD |
| 265 | void SDWADstPreserveOperand::print(raw_ostream& OS) const { |
| 266 | OS << "SDWA preserve dst: " << *getTargetOperand() |
| 267 | << " dst_sel:" << getDstSel() |
| 268 | << " preserve:" << *getPreservedOperand() << '\n'; |
| 269 | } |
| 270 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 271 | #endif |
| 272 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 273 | static void copyRegOperand(MachineOperand &To, const MachineOperand &From) { |
| 274 | assert(To.isReg() && From.isReg()); |
| 275 | To.setReg(From.getReg()); |
| 276 | To.setSubReg(From.getSubReg()); |
| 277 | To.setIsUndef(From.isUndef()); |
| 278 | if (To.isUse()) { |
| 279 | To.setIsKill(From.isKill()); |
| 280 | } else { |
| 281 | To.setIsDead(From.isDead()); |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) { |
| 286 | return LHS.isReg() && |
| 287 | RHS.isReg() && |
| 288 | LHS.getReg() == RHS.getReg() && |
| 289 | LHS.getSubReg() == RHS.getSubReg(); |
| 290 | } |
| 291 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 292 | static MachineOperand *findSingleRegUse(const MachineOperand *Reg, |
| 293 | const MachineRegisterInfo *MRI) { |
| 294 | if (!Reg->isReg() || !Reg->isDef()) |
| 295 | return nullptr; |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 296 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 297 | MachineOperand *ResMO = nullptr; |
| 298 | for (MachineOperand &UseMO : MRI->use_nodbg_operands(Reg->getReg())) { |
| 299 | // If there exist use of subreg of Reg then return nullptr |
| 300 | if (!isSameReg(UseMO, *Reg)) |
| 301 | return nullptr; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 302 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 303 | // Check that there is only one instruction that uses Reg |
| 304 | if (!ResMO) { |
| 305 | ResMO = &UseMO; |
| 306 | } else if (ResMO->getParent() != UseMO.getParent()) { |
| 307 | return nullptr; |
| 308 | } |
| 309 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 310 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 311 | return ResMO; |
| 312 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 313 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 314 | static MachineOperand *findSingleRegDef(const MachineOperand *Reg, |
| 315 | const MachineRegisterInfo *MRI) { |
| 316 | if (!Reg->isReg()) |
| 317 | return nullptr; |
| 318 | |
| 319 | MachineInstr *DefInstr = MRI->getUniqueVRegDef(Reg->getReg()); |
| 320 | if (!DefInstr) |
| 321 | return nullptr; |
| 322 | |
| 323 | for (auto &DefMO : DefInstr->defs()) { |
| 324 | if (DefMO.isReg() && DefMO.getReg() == Reg->getReg()) |
| 325 | return &DefMO; |
| 326 | } |
| 327 | |
Matt Arsenault | 8ae38bc | 2017-12-05 20:32:01 +0000 | [diff] [blame] | 328 | // Ignore implicit defs. |
| 329 | return nullptr; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 330 | } |
| 331 | |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 332 | uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, |
| 333 | const MachineOperand *SrcOp) const { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 334 | uint64_t Mods = 0; |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 335 | const auto *MI = SrcOp->getParent(); |
| 336 | if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) { |
| 337 | if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { |
| 338 | Mods = Mod->getImm(); |
| 339 | } |
| 340 | } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) { |
| 341 | if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) { |
| 342 | Mods = Mod->getImm(); |
| 343 | } |
| 344 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 345 | if (Abs || Neg) { |
| 346 | assert(!Sext && |
| 347 | "Float and integer src modifiers can't be set simulteniously"); |
| 348 | Mods |= Abs ? SISrcMods::ABS : 0; |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 349 | Mods ^= Neg ? SISrcMods::NEG : 0; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 350 | } else if (Sext) { |
| 351 | Mods |= SISrcMods::SEXT; |
| 352 | } |
| 353 | |
| 354 | return Mods; |
| 355 | } |
| 356 | |
| 357 | MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) { |
| 358 | // For SDWA src operand potential instruction is one that use register |
| 359 | // defined by parent instruction |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 360 | MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI()); |
| 361 | if (!PotentialMO) |
| 362 | return nullptr; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 363 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 364 | return PotentialMO->getParent(); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 365 | } |
| 366 | |
| 367 | bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { |
| 368 | // Find operand in instruction that matches source operand and replace it with |
| 369 | // target operand. Set corresponding src_sel |
Michael Bedy | 59e5ef7 | 2018-03-30 05:03:36 +0000 | [diff] [blame] | 370 | bool IsPreserveSrc = false; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 371 | MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 372 | MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); |
| 373 | MachineOperand *SrcMods = |
| 374 | TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 375 | assert(Src && (Src->isReg() || Src->isImm())); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 376 | if (!isSameReg(*Src, *getReplacedOperand())) { |
Michael Bedy | 59e5ef7 | 2018-03-30 05:03:36 +0000 | [diff] [blame] | 377 | // If this is not src0 then it could be src1 |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 378 | Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 379 | SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); |
| 380 | SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); |
| 381 | |
Michael Bedy | 59e5ef7 | 2018-03-30 05:03:36 +0000 | [diff] [blame] | 382 | if (!Src || |
| 383 | !isSameReg(*Src, *getReplacedOperand())) { |
| 384 | // It's possible this Src is a tied operand for |
| 385 | // UNUSED_PRESERVE, in which case we can either |
| 386 | // abandon the peephole attempt, or if legal we can |
| 387 | // copy the target operand into the tied slot |
| 388 | // if the preserve operation will effectively cause the same |
| 389 | // result by overwriting the rest of the dst. |
| 390 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 391 | MachineOperand *DstUnused = |
| 392 | TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); |
| 393 | |
| 394 | if (Dst && |
| 395 | DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) { |
| 396 | // This will work if the tied src is acessing WORD_0, and the dst is |
| 397 | // writing WORD_1. Modifiers don't matter because all the bits that |
| 398 | // would be impacted are being overwritten by the dst. |
| 399 | // Any other case will not work. |
| 400 | SdwaSel DstSel = static_cast<SdwaSel>( |
| 401 | TII->getNamedImmOperand(MI, AMDGPU::OpName::dst_sel)); |
| 402 | if (DstSel == AMDGPU::SDWA::SdwaSel::WORD_1 && |
| 403 | getSrcSel() == AMDGPU::SDWA::SdwaSel::WORD_0) { |
| 404 | IsPreserveSrc = true; |
| 405 | auto DstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), |
| 406 | AMDGPU::OpName::vdst); |
| 407 | auto TiedIdx = MI.findTiedOperandIdx(DstIdx); |
| 408 | Src = &MI.getOperand(TiedIdx); |
| 409 | SrcSel = nullptr; |
| 410 | SrcMods = nullptr; |
| 411 | } else { |
| 412 | // Not legal to convert this src |
| 413 | return false; |
| 414 | } |
| 415 | } |
| 416 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 417 | assert(Src && Src->isReg()); |
| 418 | |
| 419 | if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || |
| 420 | MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 421 | !isSameReg(*Src, *getReplacedOperand())) { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 422 | // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to |
| 423 | // src2. This is not allowed. |
| 424 | return false; |
| 425 | } |
| 426 | |
Michael Bedy | 59e5ef7 | 2018-03-30 05:03:36 +0000 | [diff] [blame] | 427 | assert(isSameReg(*Src, *getReplacedOperand()) && |
| 428 | (IsPreserveSrc || (SrcSel && SrcMods))); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 429 | } |
| 430 | copyRegOperand(*Src, *getTargetOperand()); |
Michael Bedy | 59e5ef7 | 2018-03-30 05:03:36 +0000 | [diff] [blame] | 431 | if (!IsPreserveSrc) { |
| 432 | SrcSel->setImm(getSrcSel()); |
| 433 | SrcMods->setImm(getSrcMods(TII, Src)); |
| 434 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 435 | getTargetOperand()->setIsKill(false); |
| 436 | return true; |
| 437 | } |
| 438 | |
| 439 | MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) { |
| 440 | // For SDWA dst operand potential instruction is one that defines register |
| 441 | // that this operand uses |
| 442 | MachineRegisterInfo *MRI = getMRI(); |
| 443 | MachineInstr *ParentMI = getParentInst(); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 444 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 445 | MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI); |
| 446 | if (!PotentialMO) |
| 447 | return nullptr; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 448 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 449 | // Check that ParentMI is the only instruction that uses replaced register |
| 450 | for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) { |
| 451 | if (&UseInst != ParentMI) |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 452 | return nullptr; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 453 | } |
| 454 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 455 | return PotentialMO->getParent(); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 456 | } |
| 457 | |
| 458 | bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { |
| 459 | // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused |
| 460 | |
| 461 | if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || |
| 462 | MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && |
| 463 | getDstSel() != AMDGPU::SDWA::DWORD) { |
| 464 | // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD |
| 465 | return false; |
| 466 | } |
| 467 | |
| 468 | MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 469 | assert(Operand && |
| 470 | Operand->isReg() && |
| 471 | isSameReg(*Operand, *getReplacedOperand())); |
| 472 | copyRegOperand(*Operand, *getTargetOperand()); |
| 473 | MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); |
| 474 | assert(DstSel); |
| 475 | DstSel->setImm(getDstSel()); |
| 476 | MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); |
| 477 | assert(DstUnused); |
| 478 | DstUnused->setImm(getDstUnused()); |
| 479 | |
| 480 | // Remove original instruction because it would conflict with our new |
| 481 | // instruction by register definition |
| 482 | getParentInst()->eraseFromParent(); |
| 483 | return true; |
| 484 | } |
| 485 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 486 | bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI, |
| 487 | const SIInstrInfo *TII) { |
| 488 | // MI should be moved right before v_or_b32. |
| 489 | // For this we should clear all kill flags on uses of MI src-operands or else |
| 490 | // we can encounter problem with use of killed operand. |
| 491 | for (MachineOperand &MO : MI.uses()) { |
| 492 | if (!MO.isReg()) |
| 493 | continue; |
| 494 | getMRI()->clearKillFlags(MO.getReg()); |
| 495 | } |
| 496 | |
| 497 | // Move MI before v_or_b32 |
| 498 | auto MBB = MI.getParent(); |
| 499 | MBB->remove(&MI); |
| 500 | MBB->insert(getParentInst(), &MI); |
| 501 | |
| 502 | // Add Implicit use of preserved register |
| 503 | MachineInstrBuilder MIB(*MBB->getParent(), MI); |
| 504 | MIB.addReg(getPreservedOperand()->getReg(), |
| 505 | RegState::ImplicitKill, |
| 506 | getPreservedOperand()->getSubReg()); |
| 507 | |
| 508 | // Tie dst to implicit use |
| 509 | MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst), |
| 510 | MI.getNumOperands() - 1); |
| 511 | |
| 512 | // Convert MI as any other SDWADstOperand and remove v_or_b32 |
| 513 | return SDWADstOperand::convertToSDWA(MI, TII); |
| 514 | } |
| 515 | |
Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 516 | Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const { |
| 517 | if (Op.isImm()) { |
| 518 | return Op.getImm(); |
| 519 | } |
| 520 | |
| 521 | // If this is not immediate then it can be copy of immediate value, e.g.: |
Francis Visoiu Mistrih | a8a83d1 | 2017-12-07 10:40:31 +0000 | [diff] [blame] | 522 | // %1 = S_MOV_B32 255; |
Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 523 | if (Op.isReg()) { |
| 524 | for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) { |
| 525 | if (!isSameReg(Op, Def)) |
| 526 | continue; |
| 527 | |
| 528 | const MachineInstr *DefInst = Def.getParent(); |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 529 | if (!TII->isFoldableCopy(*DefInst)) |
Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 530 | return None; |
| 531 | |
| 532 | const MachineOperand &Copied = DefInst->getOperand(1); |
| 533 | if (!Copied.isImm()) |
| 534 | return None; |
| 535 | |
| 536 | return Copied.getImm(); |
| 537 | } |
| 538 | } |
| 539 | |
| 540 | return None; |
| 541 | } |
| 542 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 543 | std::unique_ptr<SDWAOperand> |
| 544 | SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { |
| 545 | unsigned Opcode = MI.getOpcode(); |
| 546 | switch (Opcode) { |
| 547 | case AMDGPU::V_LSHRREV_B32_e32: |
| 548 | case AMDGPU::V_ASHRREV_I32_e32: |
| 549 | case AMDGPU::V_LSHLREV_B32_e32: |
| 550 | case AMDGPU::V_LSHRREV_B32_e64: |
| 551 | case AMDGPU::V_ASHRREV_I32_e64: |
| 552 | case AMDGPU::V_LSHLREV_B32_e64: { |
| 553 | // from: v_lshrrev_b32_e32 v1, 16/24, v0 |
| 554 | // to SDWA src:v0 src_sel:WORD_1/BYTE_3 |
| 555 | |
| 556 | // from: v_ashrrev_i32_e32 v1, 16/24, v0 |
| 557 | // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1 |
| 558 | |
| 559 | // from: v_lshlrev_b32_e32 v1, 16/24, v0 |
| 560 | // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD |
| 561 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 562 | auto Imm = foldToImm(*Src0); |
| 563 | if (!Imm) |
| 564 | break; |
| 565 | |
| 566 | if (*Imm != 16 && *Imm != 24) |
| 567 | break; |
| 568 | |
| 569 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 570 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 571 | if (TRI->isPhysicalRegister(Src1->getReg()) || |
| 572 | TRI->isPhysicalRegister(Dst->getReg())) |
| 573 | break; |
| 574 | |
| 575 | if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || |
| 576 | Opcode == AMDGPU::V_LSHLREV_B32_e64) { |
| 577 | return make_unique<SDWADstOperand>( |
| 578 | Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); |
| 579 | } else { |
| 580 | return make_unique<SDWASrcOperand>( |
| 581 | Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, |
| 582 | Opcode != AMDGPU::V_LSHRREV_B32_e32 && |
| 583 | Opcode != AMDGPU::V_LSHRREV_B32_e64); |
| 584 | } |
| 585 | break; |
| 586 | } |
| 587 | |
| 588 | case AMDGPU::V_LSHRREV_B16_e32: |
| 589 | case AMDGPU::V_ASHRREV_I16_e32: |
| 590 | case AMDGPU::V_LSHLREV_B16_e32: |
| 591 | case AMDGPU::V_LSHRREV_B16_e64: |
| 592 | case AMDGPU::V_ASHRREV_I16_e64: |
| 593 | case AMDGPU::V_LSHLREV_B16_e64: { |
| 594 | // from: v_lshrrev_b16_e32 v1, 8, v0 |
| 595 | // to SDWA src:v0 src_sel:BYTE_1 |
| 596 | |
| 597 | // from: v_ashrrev_i16_e32 v1, 8, v0 |
| 598 | // to SDWA src:v0 src_sel:BYTE_1 sext:1 |
| 599 | |
| 600 | // from: v_lshlrev_b16_e32 v1, 8, v0 |
| 601 | // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD |
| 602 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 603 | auto Imm = foldToImm(*Src0); |
| 604 | if (!Imm || *Imm != 8) |
| 605 | break; |
| 606 | |
| 607 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 608 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 609 | |
| 610 | if (TRI->isPhysicalRegister(Src1->getReg()) || |
| 611 | TRI->isPhysicalRegister(Dst->getReg())) |
| 612 | break; |
| 613 | |
| 614 | if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || |
| 615 | Opcode == AMDGPU::V_LSHLREV_B16_e64) { |
| 616 | return make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD); |
| 617 | } else { |
| 618 | return make_unique<SDWASrcOperand>( |
| 619 | Src1, Dst, BYTE_1, false, false, |
| 620 | Opcode != AMDGPU::V_LSHRREV_B16_e32 && |
| 621 | Opcode != AMDGPU::V_LSHRREV_B16_e64); |
| 622 | } |
| 623 | break; |
| 624 | } |
| 625 | |
| 626 | case AMDGPU::V_BFE_I32: |
| 627 | case AMDGPU::V_BFE_U32: { |
| 628 | // e.g.: |
| 629 | // from: v_bfe_u32 v1, v0, 8, 8 |
| 630 | // to SDWA src:v0 src_sel:BYTE_1 |
| 631 | |
| 632 | // offset | width | src_sel |
| 633 | // ------------------------ |
| 634 | // 0 | 8 | BYTE_0 |
| 635 | // 0 | 16 | WORD_0 |
| 636 | // 0 | 32 | DWORD ? |
| 637 | // 8 | 8 | BYTE_1 |
| 638 | // 16 | 8 | BYTE_2 |
| 639 | // 16 | 16 | WORD_1 |
| 640 | // 24 | 8 | BYTE_3 |
| 641 | |
| 642 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 643 | auto Offset = foldToImm(*Src1); |
| 644 | if (!Offset) |
| 645 | break; |
| 646 | |
| 647 | MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); |
| 648 | auto Width = foldToImm(*Src2); |
| 649 | if (!Width) |
| 650 | break; |
| 651 | |
| 652 | SdwaSel SrcSel = DWORD; |
| 653 | |
| 654 | if (*Offset == 0 && *Width == 8) |
| 655 | SrcSel = BYTE_0; |
| 656 | else if (*Offset == 0 && *Width == 16) |
| 657 | SrcSel = WORD_0; |
| 658 | else if (*Offset == 0 && *Width == 32) |
| 659 | SrcSel = DWORD; |
| 660 | else if (*Offset == 8 && *Width == 8) |
| 661 | SrcSel = BYTE_1; |
| 662 | else if (*Offset == 16 && *Width == 8) |
| 663 | SrcSel = BYTE_2; |
| 664 | else if (*Offset == 16 && *Width == 16) |
| 665 | SrcSel = WORD_1; |
| 666 | else if (*Offset == 24 && *Width == 8) |
| 667 | SrcSel = BYTE_3; |
| 668 | else |
| 669 | break; |
| 670 | |
| 671 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 672 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 673 | |
| 674 | if (TRI->isPhysicalRegister(Src0->getReg()) || |
| 675 | TRI->isPhysicalRegister(Dst->getReg())) |
| 676 | break; |
| 677 | |
| 678 | return make_unique<SDWASrcOperand>( |
| 679 | Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32); |
| 680 | } |
| 681 | |
| 682 | case AMDGPU::V_AND_B32_e32: |
| 683 | case AMDGPU::V_AND_B32_e64: { |
| 684 | // e.g.: |
| 685 | // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 |
| 686 | // to SDWA src:v0 src_sel:WORD_0/BYTE_0 |
| 687 | |
| 688 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 689 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 690 | auto ValSrc = Src1; |
| 691 | auto Imm = foldToImm(*Src0); |
| 692 | |
| 693 | if (!Imm) { |
| 694 | Imm = foldToImm(*Src1); |
| 695 | ValSrc = Src0; |
| 696 | } |
| 697 | |
| 698 | if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) |
| 699 | break; |
| 700 | |
| 701 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 702 | |
Nicolai Haehnle | cbebba4 | 2018-04-23 13:06:03 +0000 | [diff] [blame] | 703 | if (TRI->isPhysicalRegister(ValSrc->getReg()) || |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 704 | TRI->isPhysicalRegister(Dst->getReg())) |
| 705 | break; |
| 706 | |
| 707 | return make_unique<SDWASrcOperand>( |
| 708 | ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); |
| 709 | } |
| 710 | |
| 711 | case AMDGPU::V_OR_B32_e32: |
| 712 | case AMDGPU::V_OR_B32_e64: { |
| 713 | // Patterns for dst_unused:UNUSED_PRESERVE. |
| 714 | // e.g., from: |
| 715 | // v_add_f16_sdwa v0, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD |
| 716 | // src1_sel:WORD_1 src2_sel:WORD1 |
| 717 | // v_add_f16_e32 v3, v1, v2 |
| 718 | // v_or_b32_e32 v4, v0, v3 |
| 719 | // to SDWA preserve dst:v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE preserve:v3 |
| 720 | |
| 721 | // Check if one of operands of v_or_b32 is SDWA instruction |
| 722 | using CheckRetType = Optional<std::pair<MachineOperand *, MachineOperand *>>; |
| 723 | auto CheckOROperandsForSDWA = |
| 724 | [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType { |
| 725 | if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg()) |
| 726 | return CheckRetType(None); |
| 727 | |
| 728 | MachineOperand *Op1Def = findSingleRegDef(Op1, MRI); |
| 729 | if (!Op1Def) |
| 730 | return CheckRetType(None); |
| 731 | |
| 732 | MachineInstr *Op1Inst = Op1Def->getParent(); |
| 733 | if (!TII->isSDWA(*Op1Inst)) |
| 734 | return CheckRetType(None); |
| 735 | |
| 736 | MachineOperand *Op2Def = findSingleRegDef(Op2, MRI); |
| 737 | if (!Op2Def) |
| 738 | return CheckRetType(None); |
| 739 | |
| 740 | return CheckRetType(std::make_pair(Op1Def, Op2Def)); |
| 741 | }; |
| 742 | |
| 743 | MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 744 | MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 745 | assert(OrSDWA && OrOther); |
| 746 | auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther); |
| 747 | if (!Res) { |
| 748 | OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 749 | OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 750 | assert(OrSDWA && OrOther); |
| 751 | Res = CheckOROperandsForSDWA(OrSDWA, OrOther); |
| 752 | if (!Res) |
| 753 | break; |
| 754 | } |
| 755 | |
| 756 | MachineOperand *OrSDWADef = Res->first; |
| 757 | MachineOperand *OrOtherDef = Res->second; |
| 758 | assert(OrSDWADef && OrOtherDef); |
| 759 | |
| 760 | MachineInstr *SDWAInst = OrSDWADef->getParent(); |
| 761 | MachineInstr *OtherInst = OrOtherDef->getParent(); |
| 762 | |
| 763 | // Check that OtherInstr is actually bitwise compatible with SDWAInst = their |
| 764 | // destination patterns don't overlap. Compatible instruction can be either |
| 765 | // regular instruction with compatible bitness or SDWA instruction with |
| 766 | // correct dst_sel |
| 767 | // SDWAInst | OtherInst bitness / OtherInst dst_sel |
| 768 | // ----------------------------------------------------- |
| 769 | // DWORD | no / no |
| 770 | // WORD_0 | no / BYTE_2/3, WORD_1 |
| 771 | // WORD_1 | 8/16-bit instructions / BYTE_0/1, WORD_0 |
| 772 | // BYTE_0 | no / BYTE_1/2/3, WORD_1 |
| 773 | // BYTE_1 | 8-bit / BYTE_0/2/3, WORD_1 |
| 774 | // BYTE_2 | 8/16-bit / BYTE_0/1/3. WORD_0 |
| 775 | // BYTE_3 | 8/16/24-bit / BYTE_0/1/2, WORD_0 |
| 776 | // E.g. if SDWAInst is v_add_f16_sdwa dst_sel:WORD_1 then v_add_f16 is OK |
| 777 | // but v_add_f32 is not. |
| 778 | |
| 779 | // TODO: add support for non-SDWA instructions as OtherInst. |
| 780 | // For now this only works with SDWA instructions. For regular instructions |
Michael Bedy | 80cf9ff | 2018-03-11 03:27:50 +0000 | [diff] [blame] | 781 | // there is no way to determine if the instruction writes only 8/16/24-bit |
| 782 | // out of full register size and all registers are at min 32-bit wide. |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 783 | if (!TII->isSDWA(*OtherInst)) |
| 784 | break; |
| 785 | |
| 786 | SdwaSel DstSel = static_cast<SdwaSel>( |
| 787 | TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));; |
| 788 | SdwaSel OtherDstSel = static_cast<SdwaSel>( |
| 789 | TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel)); |
| 790 | |
| 791 | bool DstSelAgree = false; |
| 792 | switch (DstSel) { |
| 793 | case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) || |
| 794 | (OtherDstSel == BYTE_3) || |
| 795 | (OtherDstSel == WORD_1)); |
| 796 | break; |
| 797 | case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) || |
| 798 | (OtherDstSel == BYTE_1) || |
| 799 | (OtherDstSel == WORD_0)); |
| 800 | break; |
| 801 | case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) || |
| 802 | (OtherDstSel == BYTE_2) || |
| 803 | (OtherDstSel == BYTE_3) || |
| 804 | (OtherDstSel == WORD_1)); |
| 805 | break; |
| 806 | case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) || |
| 807 | (OtherDstSel == BYTE_2) || |
| 808 | (OtherDstSel == BYTE_3) || |
| 809 | (OtherDstSel == WORD_1)); |
| 810 | break; |
| 811 | case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) || |
| 812 | (OtherDstSel == BYTE_1) || |
| 813 | (OtherDstSel == BYTE_3) || |
| 814 | (OtherDstSel == WORD_0)); |
| 815 | break; |
| 816 | case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) || |
| 817 | (OtherDstSel == BYTE_1) || |
| 818 | (OtherDstSel == BYTE_2) || |
| 819 | (OtherDstSel == WORD_0)); |
| 820 | break; |
| 821 | default: DstSelAgree = false; |
| 822 | } |
| 823 | |
| 824 | if (!DstSelAgree) |
| 825 | break; |
| 826 | |
| 827 | // Also OtherInst dst_unused should be UNUSED_PAD |
| 828 | DstUnused OtherDstUnused = static_cast<DstUnused>( |
| 829 | TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused)); |
| 830 | if (OtherDstUnused != DstUnused::UNUSED_PAD) |
| 831 | break; |
| 832 | |
| 833 | // Create DstPreserveOperand |
| 834 | MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
| 835 | assert(OrDst && OrDst->isReg()); |
| 836 | |
| 837 | return make_unique<SDWADstPreserveOperand>( |
| 838 | OrDst, OrSDWADef, OrOtherDef, DstSel); |
| 839 | |
| 840 | } |
| 841 | } |
| 842 | |
| 843 | return std::unique_ptr<SDWAOperand>(nullptr); |
| 844 | } |
| 845 | |
Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 846 | void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) { |
| 847 | for (MachineInstr &MI : MBB) { |
| 848 | if (auto Operand = matchSDWAOperand(MI)) { |
Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 849 | LLVM_DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n'); |
Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 850 | SDWAOperands[&MI] = std::move(Operand); |
| 851 | ++NumSDWAPatternsFound; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 852 | } |
| 853 | } |
| 854 | } |
| 855 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 856 | bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, |
| 857 | const SISubtarget &ST) const { |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 858 | // Check if this is already an SDWA instruction |
| 859 | unsigned Opc = MI.getOpcode(); |
| 860 | if (TII->isSDWA(Opc)) |
| 861 | return true; |
| 862 | |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 863 | // Check if this instruction has opcode that supports SDWA |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 864 | if (AMDGPU::getSDWAOp(Opc) == -1) |
| 865 | Opc = AMDGPU::getVOPe32(Opc); |
| 866 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 867 | if (AMDGPU::getSDWAOp(Opc) == -1) |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 868 | return false; |
| 869 | |
| 870 | if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) |
| 871 | return false; |
| 872 | |
| 873 | if (TII->isVOPC(Opc)) { |
| 874 | if (!ST.hasSDWASdst()) { |
| 875 | const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); |
| 876 | if (SDst && SDst->getReg() != AMDGPU::VCC) |
| 877 | return false; |
| 878 | } |
| 879 | |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 880 | if (!ST.hasSDWAOutModsVOPC() && |
| 881 | (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) || |
| 882 | TII->hasModifiersSet(MI, AMDGPU::OpName::omod))) |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 883 | return false; |
| 884 | |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 885 | } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) || |
| 886 | !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) { |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 887 | return false; |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 888 | } |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 889 | |
| 890 | if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 || |
| 891 | Opc == AMDGPU::V_MAC_F32_e32)) |
| 892 | return false; |
| 893 | |
Dmitry Preobrazhensky | 4c45e6f | 2018-04-16 12:41:38 +0000 | [diff] [blame] | 894 | // FIXME: has SDWA but require handling of implicit VCC use |
| 895 | if (Opc == AMDGPU::V_CNDMASK_B32_e32) |
| 896 | return false; |
| 897 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 898 | return true; |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 899 | } |
| 900 | |
| 901 | bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, |
| 902 | const SDWAOperandsVector &SDWAOperands) { |
Michael Bedy | 59e5ef7 | 2018-03-30 05:03:36 +0000 | [diff] [blame] | 903 | |
Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 904 | LLVM_DEBUG(dbgs() << "Convert instruction:" << MI); |
Michael Bedy | 59e5ef7 | 2018-03-30 05:03:36 +0000 | [diff] [blame] | 905 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 906 | // Convert to sdwa |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 907 | int SDWAOpcode; |
| 908 | unsigned Opcode = MI.getOpcode(); |
| 909 | if (TII->isSDWA(Opcode)) { |
| 910 | SDWAOpcode = Opcode; |
| 911 | } else { |
| 912 | SDWAOpcode = AMDGPU::getSDWAOp(Opcode); |
| 913 | if (SDWAOpcode == -1) |
| 914 | SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode)); |
| 915 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 916 | assert(SDWAOpcode != -1); |
| 917 | |
| 918 | const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); |
| 919 | |
| 920 | // Create SDWA version of instruction MI and initialize its operands |
| 921 | MachineInstrBuilder SDWAInst = |
| 922 | BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); |
| 923 | |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 924 | // Copy dst, if it is present in original then should also be present in SDWA |
| 925 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 926 | if (Dst) { |
| 927 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); |
| 928 | SDWAInst.add(*Dst); |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 929 | } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) { |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 930 | assert(Dst && |
| 931 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); |
| 932 | SDWAInst.add(*Dst); |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 933 | } else { |
| 934 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); |
| 935 | SDWAInst.addReg(AMDGPU::VCC, RegState::Define); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 936 | } |
| 937 | |
| 938 | // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and |
| 939 | // src0_modifiers (except for v_nop_sdwa, but it can't get here) |
| 940 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); |
| 941 | assert( |
| 942 | Src0 && |
| 943 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && |
| 944 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 945 | if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) |
| 946 | SDWAInst.addImm(Mod->getImm()); |
| 947 | else |
| 948 | SDWAInst.addImm(0); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 949 | SDWAInst.add(*Src0); |
| 950 | |
| 951 | // Copy src1 if present, initialize src1_modifiers. |
| 952 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); |
| 953 | if (Src1) { |
| 954 | assert( |
| 955 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && |
| 956 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); |
Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 957 | if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) |
| 958 | SDWAInst.addImm(Mod->getImm()); |
| 959 | else |
| 960 | SDWAInst.addImm(0); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 961 | SDWAInst.add(*Src1); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 962 | } |
| 963 | |
| 964 | if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || |
| 965 | SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) { |
| 966 | // v_mac_f16/32 has additional src2 operand tied to vdst |
| 967 | MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); |
| 968 | assert(Src2); |
| 969 | SDWAInst.add(*Src2); |
| 970 | } |
| 971 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 972 | // Copy clamp if present, initialize otherwise |
| 973 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1); |
| 974 | MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp); |
| 975 | if (Clamp) { |
| 976 | SDWAInst.add(*Clamp); |
| 977 | } else { |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 978 | SDWAInst.addImm(0); |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 979 | } |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 980 | |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 981 | // Copy omod if present, initialize otherwise if needed |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 982 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { |
| 983 | MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); |
| 984 | if (OMod) { |
| 985 | SDWAInst.add(*OMod); |
| 986 | } else { |
| 987 | SDWAInst.addImm(0); |
| 988 | } |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 989 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 990 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 991 | // Copy dst_sel if present, initialize otherwise if needed |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 992 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) { |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 993 | MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); |
| 994 | if (DstSel) { |
| 995 | SDWAInst.add(*DstSel); |
| 996 | } else { |
| 997 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); |
| 998 | } |
| 999 | } |
| 1000 | |
| 1001 | // Copy dst_unused if present, initialize otherwise if needed |
| 1002 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) { |
| 1003 | MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); |
| 1004 | if (DstUnused) { |
| 1005 | SDWAInst.add(*DstUnused); |
| 1006 | } else { |
| 1007 | SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD); |
| 1008 | } |
| 1009 | } |
| 1010 | |
| 1011 | // Copy src0_sel if present, initialize otherwise |
| 1012 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1); |
| 1013 | MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); |
| 1014 | if (Src0Sel) { |
| 1015 | SDWAInst.add(*Src0Sel); |
| 1016 | } else { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1017 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); |
Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 1018 | } |
| 1019 | |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1020 | // Copy src1_sel if present, initialize otherwise if needed |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1021 | if (Src1) { |
| 1022 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1); |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1023 | MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); |
| 1024 | if (Src1Sel) { |
| 1025 | SDWAInst.add(*Src1Sel); |
| 1026 | } else { |
| 1027 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); |
| 1028 | } |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1029 | } |
| 1030 | |
Michael Bedy | 59e5ef7 | 2018-03-30 05:03:36 +0000 | [diff] [blame] | 1031 | // Check for a preserved register that needs to be copied. |
| 1032 | auto DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); |
| 1033 | if (DstUnused && |
| 1034 | DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) { |
| 1035 | // We expect, if we are here, that the instruction was already in it's SDWA form, |
| 1036 | // with a tied operand. |
| 1037 | assert(Dst && Dst->isTied()); |
| 1038 | assert(Opcode == static_cast<unsigned int>(SDWAOpcode)); |
| 1039 | // We also expect a vdst, since sdst can't preserve. |
| 1040 | auto PreserveDstIdx = AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst); |
| 1041 | assert(PreserveDstIdx != -1); |
| 1042 | |
| 1043 | auto TiedIdx = MI.findTiedOperandIdx(PreserveDstIdx); |
| 1044 | auto Tied = MI.getOperand(TiedIdx); |
| 1045 | |
| 1046 | SDWAInst.add(Tied); |
| 1047 | SDWAInst->tieOperands(PreserveDstIdx, SDWAInst->getNumOperands() - 1); |
| 1048 | } |
| 1049 | |
Matt Arsenault | c24d5e2 | 2018-02-08 22:46:38 +0000 | [diff] [blame] | 1050 | // Apply all sdwa operand patterns. |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1051 | bool Converted = false; |
| 1052 | for (auto &Operand : SDWAOperands) { |
Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 1053 | LLVM_DEBUG(dbgs() << *SDWAInst << "\nOperand: " << *Operand); |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 1054 | // There should be no intesection between SDWA operands and potential MIs |
| 1055 | // e.g.: |
| 1056 | // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0 |
| 1057 | // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0 |
| 1058 | // v_add_u32 v3, v4, v2 |
| 1059 | // |
| 1060 | // In that example it is possible that we would fold 2nd instruction into 3rd |
| 1061 | // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was |
| 1062 | // already destroyed). So if SDWAOperand is also a potential MI then do not |
| 1063 | // apply it. |
| 1064 | if (PotentialMatches.count(Operand->getParentInst()) == 0) |
| 1065 | Converted |= Operand->convertToSDWA(*SDWAInst, TII); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1066 | } |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1067 | if (Converted) { |
| 1068 | ConvertedInstructions.push_back(SDWAInst); |
| 1069 | } else { |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1070 | SDWAInst->eraseFromParent(); |
| 1071 | return false; |
| 1072 | } |
| 1073 | |
Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 1074 | LLVM_DEBUG(dbgs() << "\nInto:" << *SDWAInst << '\n'); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1075 | ++NumSDWAInstructionsPeepholed; |
| 1076 | |
| 1077 | MI.eraseFromParent(); |
| 1078 | return true; |
| 1079 | } |
| 1080 | |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1081 | // If an instruction was converted to SDWA it should not have immediates or SGPR |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1082 | // operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs. |
Matt Arsenault | c24d5e2 | 2018-02-08 22:46:38 +0000 | [diff] [blame] | 1083 | void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, |
| 1084 | const SISubtarget &ST) const { |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1085 | const MCInstrDesc &Desc = TII->get(MI.getOpcode()); |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1086 | unsigned ConstantBusCount = 0; |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1087 | for (MachineOperand &Op : MI.explicit_uses()) { |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1088 | if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg()))) |
| 1089 | continue; |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1090 | |
| 1091 | unsigned I = MI.getOperandNo(&Op); |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1092 | if (Desc.OpInfo[I].RegClass == -1 || |
| 1093 | !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass))) |
| 1094 | continue; |
Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1095 | |
| 1096 | if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() && |
| 1097 | TRI->isSGPRReg(*MRI, Op.getReg())) { |
| 1098 | ++ConstantBusCount; |
| 1099 | continue; |
| 1100 | } |
| 1101 | |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1102 | unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); |
| 1103 | auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), |
| 1104 | TII->get(AMDGPU::V_MOV_B32_e32), VGPR); |
| 1105 | if (Op.isImm()) |
| 1106 | Copy.addImm(Op.getImm()); |
| 1107 | else if (Op.isReg()) |
| 1108 | Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0, |
| 1109 | Op.getSubReg()); |
| 1110 | Op.ChangeToRegister(VGPR, false); |
| 1111 | } |
| 1112 | } |
| 1113 | |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1114 | bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { |
| 1115 | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); |
| 1116 | |
Matthias Braun | f1caa28 | 2017-12-15 22:22:58 +0000 | [diff] [blame] | 1117 | if (!ST.hasSDWA() || skipFunction(MF.getFunction())) |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1118 | return false; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1119 | |
| 1120 | MRI = &MF.getRegInfo(); |
| 1121 | TRI = ST.getRegisterInfo(); |
| 1122 | TII = ST.getInstrInfo(); |
Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 1123 | |
Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 1124 | // Find all SDWA operands in MF. |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1125 | bool Ret = false; |
Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1126 | for (MachineBasicBlock &MBB : MF) { |
| 1127 | bool Changed = false; |
| 1128 | do { |
| 1129 | matchSDWAOperands(MBB); |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1130 | |
Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1131 | for (const auto &OperandPair : SDWAOperands) { |
| 1132 | const auto &Operand = OperandPair.second; |
| 1133 | MachineInstr *PotentialMI = Operand->potentialToConvert(TII); |
| 1134 | if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) { |
| 1135 | PotentialMatches[PotentialMI].push_back(Operand.get()); |
| 1136 | } |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1137 | } |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 1138 | |
Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1139 | for (auto &PotentialPair : PotentialMatches) { |
| 1140 | MachineInstr &PotentialMI = *PotentialPair.first; |
| 1141 | convertToSDWA(PotentialMI, PotentialPair.second); |
| 1142 | } |
Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 1143 | |
Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1144 | PotentialMatches.clear(); |
| 1145 | SDWAOperands.clear(); |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1146 | |
Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1147 | Changed = !ConvertedInstructions.empty(); |
Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1148 | |
Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1149 | if (Changed) |
| 1150 | Ret = true; |
| 1151 | while (!ConvertedInstructions.empty()) |
| 1152 | legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST); |
| 1153 | } while (Changed); |
| 1154 | } |
Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1155 | |
Stanislav Mekhanoshin | e4cda74 | 2017-06-06 16:42:30 +0000 | [diff] [blame] | 1156 | return Ret; |
Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1157 | } |