| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 1 | //===- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions ---===// | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | /// \file This pass tries to apply several peephole SDWA patterns. | 
|  | 11 | /// | 
|  | 12 | /// E.g. original: | 
| Francis Visoiu Mistrih | 93ef145 | 2017-11-30 12:12:19 +0000 | [diff] [blame] | 13 | ///   V_LSHRREV_B32_e32 %0, 16, %1 | 
|  | 14 | ///   V_ADD_I32_e32 %2, %0, %3 | 
|  | 15 | ///   V_LSHLREV_B32_e32 %4, 16, %2 | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 16 | /// | 
|  | 17 | /// Replace: | 
| Francis Visoiu Mistrih | 93ef145 | 2017-11-30 12:12:19 +0000 | [diff] [blame] | 18 | ///   V_ADD_I32_sdwa %4, %1, %3 | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 19 | ///       dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD | 
|  | 20 | /// | 
|  | 21 | //===----------------------------------------------------------------------===// | 
|  | 22 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 23 | #include "AMDGPU.h" | 
|  | 24 | #include "AMDGPUSubtarget.h" | 
|  | 25 | #include "SIDefines.h" | 
|  | 26 | #include "SIInstrInfo.h" | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 27 | #include "SIRegisterInfo.h" | 
|  | 28 | #include "Utils/AMDGPUBaseInfo.h" | 
|  | 29 | #include "llvm/ADT/None.h" | 
|  | 30 | #include "llvm/ADT/Optional.h" | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 31 | #include "llvm/ADT/STLExtras.h" | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 32 | #include "llvm/ADT/SmallVector.h" | 
| Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 33 | #include "llvm/ADT/Statistic.h" | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 34 | #include "llvm/CodeGen/MachineBasicBlock.h" | 
|  | 35 | #include "llvm/CodeGen/MachineFunction.h" | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 36 | #include "llvm/CodeGen/MachineFunctionPass.h" | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 37 | #include "llvm/CodeGen/MachineInstr.h" | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 38 | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 39 | #include "llvm/CodeGen/MachineOperand.h" | 
|  | 40 | #include "llvm/CodeGen/MachineRegisterInfo.h" | 
| David Blaikie | b3bde2e | 2017-11-17 01:07:10 +0000 | [diff] [blame] | 41 | #include "llvm/CodeGen/TargetRegisterInfo.h" | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 42 | #include "llvm/MC/LaneBitmask.h" | 
|  | 43 | #include "llvm/MC/MCInstrDesc.h" | 
|  | 44 | #include "llvm/Pass.h" | 
|  | 45 | #include "llvm/Support/Debug.h" | 
|  | 46 | #include "llvm/Support/raw_ostream.h" | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 47 | #include <algorithm> | 
|  | 48 | #include <cassert> | 
|  | 49 | #include <cstdint> | 
|  | 50 | #include <memory> | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 51 | #include <unordered_map> | 
|  | 52 |  | 
|  | 53 | using namespace llvm; | 
|  | 54 |  | 
|  | 55 | #define DEBUG_TYPE "si-peephole-sdwa" | 
|  | 56 |  | 
|  | 57 | STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found."); | 
|  | 58 | STATISTIC(NumSDWAInstructionsPeepholed, | 
|  | 59 | "Number of instruction converted to SDWA."); | 
|  | 60 |  | 
|  | 61 | namespace { | 
|  | 62 |  | 
|  | 63 | class SDWAOperand; | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 64 | class SDWADstOperand; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 65 |  | 
|  | 66 | class SIPeepholeSDWA : public MachineFunctionPass { | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 67 | public: | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 68 | using SDWAOperandsVector = SmallVector<SDWAOperand *, 4>; | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 69 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 70 | private: | 
|  | 71 | MachineRegisterInfo *MRI; | 
|  | 72 | const SIRegisterInfo *TRI; | 
|  | 73 | const SIInstrInfo *TII; | 
|  | 74 |  | 
|  | 75 | std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands; | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 76 | std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches; | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 77 | SmallVector<MachineInstr *, 8> ConvertedInstructions; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 78 |  | 
| Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 79 | Optional<int64_t> foldToImm(const MachineOperand &Op) const; | 
|  | 80 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 81 | public: | 
|  | 82 | static char ID; | 
|  | 83 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 84 | SIPeepholeSDWA() : MachineFunctionPass(ID) { | 
|  | 85 | initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry()); | 
|  | 86 | } | 
|  | 87 |  | 
|  | 88 | bool runOnMachineFunction(MachineFunction &MF) override; | 
| Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 89 | void matchSDWAOperands(MachineBasicBlock &MBB); | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 90 | std::unique_ptr<SDWAOperand> matchSDWAOperand(MachineInstr &MI); | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 91 | bool isConvertibleToSDWA(const MachineInstr &MI, const SISubtarget &ST) const; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 92 | bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 93 | void legalizeScalarOperands(MachineInstr &MI, const SISubtarget &ST) const; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 94 |  | 
|  | 95 | StringRef getPassName() const override { return "SI Peephole SDWA"; } | 
|  | 96 |  | 
|  | 97 | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | 98 | AU.setPreservesCFG(); | 
|  | 99 | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | 100 | } | 
|  | 101 | }; | 
|  | 102 |  | 
|  | 103 | class SDWAOperand { | 
|  | 104 | private: | 
|  | 105 | MachineOperand *Target; // Operand that would be used in converted instruction | 
|  | 106 | MachineOperand *Replaced; // Operand that would be replace by Target | 
|  | 107 |  | 
|  | 108 | public: | 
|  | 109 | SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp) | 
|  | 110 | : Target(TargetOp), Replaced(ReplacedOp) { | 
|  | 111 | assert(Target->isReg()); | 
|  | 112 | assert(Replaced->isReg()); | 
|  | 113 | } | 
|  | 114 |  | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 115 | virtual ~SDWAOperand() = default; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 116 |  | 
|  | 117 | virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0; | 
|  | 118 | virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0; | 
|  | 119 |  | 
|  | 120 | MachineOperand *getTargetOperand() const { return Target; } | 
|  | 121 | MachineOperand *getReplacedOperand() const { return Replaced; } | 
|  | 122 | MachineInstr *getParentInst() const { return Target->getParent(); } | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 123 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 124 | MachineRegisterInfo *getMRI() const { | 
|  | 125 | return &getParentInst()->getParent()->getParent()->getRegInfo(); | 
|  | 126 | } | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 127 |  | 
|  | 128 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
|  | 129 | virtual void print(raw_ostream& OS) const = 0; | 
|  | 130 | void dump() const { print(dbgs()); } | 
|  | 131 | #endif | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 132 | }; | 
|  | 133 |  | 
|  | 134 | using namespace AMDGPU::SDWA; | 
|  | 135 |  | 
|  | 136 | class SDWASrcOperand : public SDWAOperand { | 
|  | 137 | private: | 
|  | 138 | SdwaSel SrcSel; | 
|  | 139 | bool Abs; | 
|  | 140 | bool Neg; | 
|  | 141 | bool Sext; | 
|  | 142 |  | 
|  | 143 | public: | 
|  | 144 | SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, | 
|  | 145 | SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false, | 
|  | 146 | bool Sext_ = false) | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 147 | : SDWAOperand(TargetOp, ReplacedOp), | 
|  | 148 | SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {} | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 149 |  | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 150 | MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; | 
|  | 151 | bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 152 |  | 
|  | 153 | SdwaSel getSrcSel() const { return SrcSel; } | 
|  | 154 | bool getAbs() const { return Abs; } | 
|  | 155 | bool getNeg() const { return Neg; } | 
|  | 156 | bool getSext() const { return Sext; } | 
|  | 157 |  | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 158 | uint64_t getSrcMods(const SIInstrInfo *TII, | 
|  | 159 | const MachineOperand *SrcOp) const; | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 160 |  | 
|  | 161 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
|  | 162 | void print(raw_ostream& OS) const override; | 
|  | 163 | #endif | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 164 | }; | 
|  | 165 |  | 
|  | 166 | class SDWADstOperand : public SDWAOperand { | 
|  | 167 | private: | 
|  | 168 | SdwaSel DstSel; | 
|  | 169 | DstUnused DstUn; | 
|  | 170 |  | 
|  | 171 | public: | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 172 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 173 | SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, | 
|  | 174 | SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD) | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 175 | : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {} | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 176 |  | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 177 | MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; | 
|  | 178 | bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 179 |  | 
|  | 180 | SdwaSel getDstSel() const { return DstSel; } | 
|  | 181 | DstUnused getDstUnused() const { return DstUn; } | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 182 |  | 
|  | 183 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
|  | 184 | void print(raw_ostream& OS) const override; | 
|  | 185 | #endif | 
|  | 186 | }; | 
|  | 187 |  | 
|  | 188 | class SDWADstPreserveOperand : public SDWADstOperand { | 
|  | 189 | private: | 
|  | 190 | MachineOperand *Preserve; | 
|  | 191 |  | 
|  | 192 | public: | 
|  | 193 | SDWADstPreserveOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp, | 
|  | 194 | MachineOperand *PreserveOp, SdwaSel DstSel_ = DWORD) | 
|  | 195 | : SDWADstOperand(TargetOp, ReplacedOp, DstSel_, UNUSED_PRESERVE), | 
|  | 196 | Preserve(PreserveOp) {} | 
|  | 197 |  | 
|  | 198 | bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; | 
|  | 199 |  | 
|  | 200 | MachineOperand *getPreservedOperand() const { return Preserve; } | 
|  | 201 |  | 
|  | 202 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
|  | 203 | void print(raw_ostream& OS) const override; | 
|  | 204 | #endif | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 205 | }; | 
|  | 206 |  | 
| Eugene Zelenko | 59e1282 | 2017-08-08 00:47:13 +0000 | [diff] [blame] | 207 | } // end anonymous namespace | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 208 |  | 
|  | 209 | INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false) | 
|  | 210 |  | 
|  | 211 | char SIPeepholeSDWA::ID = 0; | 
|  | 212 |  | 
|  | 213 | char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID; | 
|  | 214 |  | 
|  | 215 | FunctionPass *llvm::createSIPeepholeSDWAPass() { | 
|  | 216 | return new SIPeepholeSDWA(); | 
|  | 217 | } | 
|  | 218 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 219 |  | 
|  | 220 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | 
| Matt Arsenault | c24d5e2 | 2018-02-08 22:46:38 +0000 | [diff] [blame] | 221 | static raw_ostream& operator<<(raw_ostream &OS, SdwaSel Sel) { | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 222 | switch(Sel) { | 
|  | 223 | case BYTE_0: OS << "BYTE_0"; break; | 
|  | 224 | case BYTE_1: OS << "BYTE_1"; break; | 
|  | 225 | case BYTE_2: OS << "BYTE_2"; break; | 
|  | 226 | case BYTE_3: OS << "BYTE_3"; break; | 
|  | 227 | case WORD_0: OS << "WORD_0"; break; | 
|  | 228 | case WORD_1: OS << "WORD_1"; break; | 
|  | 229 | case DWORD:  OS << "DWORD"; break; | 
|  | 230 | } | 
|  | 231 | return OS; | 
|  | 232 | } | 
|  | 233 |  | 
|  | 234 | static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) { | 
|  | 235 | switch(Un) { | 
|  | 236 | case UNUSED_PAD: OS << "UNUSED_PAD"; break; | 
|  | 237 | case UNUSED_SEXT: OS << "UNUSED_SEXT"; break; | 
|  | 238 | case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break; | 
|  | 239 | } | 
|  | 240 | return OS; | 
|  | 241 | } | 
|  | 242 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 243 | static raw_ostream& operator<<(raw_ostream &OS, const SDWAOperand &Operand) { | 
|  | 244 | Operand.print(OS); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 245 | return OS; | 
|  | 246 | } | 
|  | 247 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 248 | LLVM_DUMP_METHOD | 
|  | 249 | void SDWASrcOperand::print(raw_ostream& OS) const { | 
|  | 250 | OS << "SDWA src: " << *getTargetOperand() | 
|  | 251 | << " src_sel:" << getSrcSel() | 
|  | 252 | << " abs:" << getAbs() << " neg:" << getNeg() | 
|  | 253 | << " sext:" << getSext() << '\n'; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 254 | } | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 255 |  | 
|  | 256 | LLVM_DUMP_METHOD | 
|  | 257 | void SDWADstOperand::print(raw_ostream& OS) const { | 
|  | 258 | OS << "SDWA dst: " << *getTargetOperand() | 
|  | 259 | << " dst_sel:" << getDstSel() | 
|  | 260 | << " dst_unused:" << getDstUnused() << '\n'; | 
|  | 261 | } | 
|  | 262 |  | 
|  | 263 | LLVM_DUMP_METHOD | 
|  | 264 | void SDWADstPreserveOperand::print(raw_ostream& OS) const { | 
|  | 265 | OS << "SDWA preserve dst: " << *getTargetOperand() | 
|  | 266 | << " dst_sel:" << getDstSel() | 
|  | 267 | << " preserve:" << *getPreservedOperand() << '\n'; | 
|  | 268 | } | 
|  | 269 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 270 | #endif | 
|  | 271 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 272 | static void copyRegOperand(MachineOperand &To, const MachineOperand &From) { | 
|  | 273 | assert(To.isReg() && From.isReg()); | 
|  | 274 | To.setReg(From.getReg()); | 
|  | 275 | To.setSubReg(From.getSubReg()); | 
|  | 276 | To.setIsUndef(From.isUndef()); | 
|  | 277 | if (To.isUse()) { | 
|  | 278 | To.setIsKill(From.isKill()); | 
|  | 279 | } else { | 
|  | 280 | To.setIsDead(From.isDead()); | 
|  | 281 | } | 
|  | 282 | } | 
|  | 283 |  | 
|  | 284 | static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) { | 
|  | 285 | return LHS.isReg() && | 
|  | 286 | RHS.isReg() && | 
|  | 287 | LHS.getReg() == RHS.getReg() && | 
|  | 288 | LHS.getSubReg() == RHS.getSubReg(); | 
|  | 289 | } | 
|  | 290 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 291 | static MachineOperand *findSingleRegUse(const MachineOperand *Reg, | 
|  | 292 | const MachineRegisterInfo *MRI) { | 
|  | 293 | if (!Reg->isReg() || !Reg->isDef()) | 
|  | 294 | return nullptr; | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 295 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 296 | MachineOperand *ResMO = nullptr; | 
|  | 297 | for (MachineOperand &UseMO : MRI->use_nodbg_operands(Reg->getReg())) { | 
|  | 298 | // If there exist use of subreg of Reg then return nullptr | 
|  | 299 | if (!isSameReg(UseMO, *Reg)) | 
|  | 300 | return nullptr; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 301 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 302 | // Check that there is only one instruction that uses Reg | 
|  | 303 | if (!ResMO) { | 
|  | 304 | ResMO = &UseMO; | 
|  | 305 | } else if (ResMO->getParent() != UseMO.getParent()) { | 
|  | 306 | return nullptr; | 
|  | 307 | } | 
|  | 308 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 309 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 310 | return ResMO; | 
|  | 311 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 312 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 313 | static MachineOperand *findSingleRegDef(const MachineOperand *Reg, | 
|  | 314 | const MachineRegisterInfo *MRI) { | 
|  | 315 | if (!Reg->isReg()) | 
|  | 316 | return nullptr; | 
|  | 317 |  | 
|  | 318 | MachineInstr *DefInstr = MRI->getUniqueVRegDef(Reg->getReg()); | 
|  | 319 | if (!DefInstr) | 
|  | 320 | return nullptr; | 
|  | 321 |  | 
|  | 322 | for (auto &DefMO : DefInstr->defs()) { | 
|  | 323 | if (DefMO.isReg() && DefMO.getReg() == Reg->getReg()) | 
|  | 324 | return &DefMO; | 
|  | 325 | } | 
|  | 326 |  | 
| Matt Arsenault | 8ae38bc | 2017-12-05 20:32:01 +0000 | [diff] [blame] | 327 | // Ignore implicit defs. | 
|  | 328 | return nullptr; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 329 | } | 
|  | 330 |  | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 331 | uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, | 
|  | 332 | const MachineOperand *SrcOp) const { | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 333 | uint64_t Mods = 0; | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 334 | const auto *MI = SrcOp->getParent(); | 
|  | 335 | if (TII->getNamedOperand(*MI, AMDGPU::OpName::src0) == SrcOp) { | 
|  | 336 | if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) { | 
|  | 337 | Mods = Mod->getImm(); | 
|  | 338 | } | 
|  | 339 | } else if (TII->getNamedOperand(*MI, AMDGPU::OpName::src1) == SrcOp) { | 
|  | 340 | if (auto *Mod = TII->getNamedOperand(*MI, AMDGPU::OpName::src1_modifiers)) { | 
|  | 341 | Mods = Mod->getImm(); | 
|  | 342 | } | 
|  | 343 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 344 | if (Abs || Neg) { | 
|  | 345 | assert(!Sext && | 
|  | 346 | "Float and integer src modifiers can't be set simulteniously"); | 
|  | 347 | Mods |= Abs ? SISrcMods::ABS : 0; | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 348 | Mods ^= Neg ? SISrcMods::NEG : 0; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 349 | } else if (Sext) { | 
|  | 350 | Mods |= SISrcMods::SEXT; | 
|  | 351 | } | 
|  | 352 |  | 
|  | 353 | return Mods; | 
|  | 354 | } | 
|  | 355 |  | 
|  | 356 | MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) { | 
|  | 357 | // For SDWA src operand potential instruction is one that use register | 
|  | 358 | // defined by parent instruction | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 359 | MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI()); | 
|  | 360 | if (!PotentialMO) | 
|  | 361 | return nullptr; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 362 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 363 | return PotentialMO->getParent(); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 364 | } | 
|  | 365 |  | 
|  | 366 | bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { | 
|  | 367 | // Find operand in instruction that matches source operand and replace it with | 
|  | 368 | // target operand. Set corresponding src_sel | 
|  | 369 |  | 
|  | 370 | MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 371 | MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); | 
|  | 372 | MachineOperand *SrcMods = | 
|  | 373 | TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers); | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 374 | assert(Src && (Src->isReg() || Src->isImm())); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 375 | if (!isSameReg(*Src, *getReplacedOperand())) { | 
|  | 376 | // If this is not src0 then it should be src1 | 
|  | 377 | Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 378 | SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); | 
|  | 379 | SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); | 
|  | 380 |  | 
|  | 381 | assert(Src && Src->isReg()); | 
|  | 382 |  | 
|  | 383 | if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || | 
|  | 384 | MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 385 | !isSameReg(*Src, *getReplacedOperand())) { | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 386 | // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to | 
|  | 387 | // src2. This is not allowed. | 
|  | 388 | return false; | 
|  | 389 | } | 
|  | 390 |  | 
|  | 391 | assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods); | 
|  | 392 | } | 
|  | 393 | copyRegOperand(*Src, *getTargetOperand()); | 
|  | 394 | SrcSel->setImm(getSrcSel()); | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 395 | SrcMods->setImm(getSrcMods(TII, Src)); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 396 | getTargetOperand()->setIsKill(false); | 
|  | 397 | return true; | 
|  | 398 | } | 
|  | 399 |  | 
|  | 400 | MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) { | 
|  | 401 | // For SDWA dst operand potential instruction is one that defines register | 
|  | 402 | // that this operand uses | 
|  | 403 | MachineRegisterInfo *MRI = getMRI(); | 
|  | 404 | MachineInstr *ParentMI = getParentInst(); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 405 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 406 | MachineOperand *PotentialMO = findSingleRegDef(getReplacedOperand(), MRI); | 
|  | 407 | if (!PotentialMO) | 
|  | 408 | return nullptr; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 409 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 410 | // Check that ParentMI is the only instruction that uses replaced register | 
|  | 411 | for (MachineInstr &UseInst : MRI->use_nodbg_instructions(PotentialMO->getReg())) { | 
|  | 412 | if (&UseInst != ParentMI) | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 413 | return nullptr; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 414 | } | 
|  | 415 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 416 | return PotentialMO->getParent(); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 417 | } | 
|  | 418 |  | 
|  | 419 | bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { | 
|  | 420 | // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused | 
|  | 421 |  | 
|  | 422 | if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa || | 
|  | 423 | MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) && | 
|  | 424 | getDstSel() != AMDGPU::SDWA::DWORD) { | 
|  | 425 | // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD | 
|  | 426 | return false; | 
|  | 427 | } | 
|  | 428 |  | 
|  | 429 | MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
|  | 430 | assert(Operand && | 
|  | 431 | Operand->isReg() && | 
|  | 432 | isSameReg(*Operand, *getReplacedOperand())); | 
|  | 433 | copyRegOperand(*Operand, *getTargetOperand()); | 
|  | 434 | MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); | 
|  | 435 | assert(DstSel); | 
|  | 436 | DstSel->setImm(getDstSel()); | 
|  | 437 | MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); | 
|  | 438 | assert(DstUnused); | 
|  | 439 | DstUnused->setImm(getDstUnused()); | 
|  | 440 |  | 
|  | 441 | // Remove original instruction  because it would conflict with our new | 
|  | 442 | // instruction by register definition | 
|  | 443 | getParentInst()->eraseFromParent(); | 
|  | 444 | return true; | 
|  | 445 | } | 
|  | 446 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 447 | bool SDWADstPreserveOperand::convertToSDWA(MachineInstr &MI, | 
|  | 448 | const SIInstrInfo *TII) { | 
|  | 449 | // MI should be moved right before v_or_b32. | 
|  | 450 | // For this we should clear all kill flags on uses of MI src-operands or else | 
|  | 451 | // we can encounter problem with use of killed operand. | 
|  | 452 | for (MachineOperand &MO : MI.uses()) { | 
|  | 453 | if (!MO.isReg()) | 
|  | 454 | continue; | 
|  | 455 | getMRI()->clearKillFlags(MO.getReg()); | 
|  | 456 | } | 
|  | 457 |  | 
|  | 458 | // Move MI before v_or_b32 | 
|  | 459 | auto MBB = MI.getParent(); | 
|  | 460 | MBB->remove(&MI); | 
|  | 461 | MBB->insert(getParentInst(), &MI); | 
|  | 462 |  | 
|  | 463 | // Add Implicit use of preserved register | 
|  | 464 | MachineInstrBuilder MIB(*MBB->getParent(), MI); | 
|  | 465 | MIB.addReg(getPreservedOperand()->getReg(), | 
|  | 466 | RegState::ImplicitKill, | 
|  | 467 | getPreservedOperand()->getSubReg()); | 
|  | 468 |  | 
|  | 469 | // Tie dst to implicit use | 
|  | 470 | MI.tieOperands(AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst), | 
|  | 471 | MI.getNumOperands() - 1); | 
|  | 472 |  | 
|  | 473 | // Convert MI as any other SDWADstOperand and remove v_or_b32 | 
|  | 474 | return SDWADstOperand::convertToSDWA(MI, TII); | 
|  | 475 | } | 
|  | 476 |  | 
| Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 477 | Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const { | 
|  | 478 | if (Op.isImm()) { | 
|  | 479 | return Op.getImm(); | 
|  | 480 | } | 
|  | 481 |  | 
|  | 482 | // If this is not immediate then it can be copy of immediate value, e.g.: | 
| Francis Visoiu Mistrih | a8a83d1 | 2017-12-07 10:40:31 +0000 | [diff] [blame] | 483 | // %1 = S_MOV_B32 255; | 
| Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 484 | if (Op.isReg()) { | 
|  | 485 | for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) { | 
|  | 486 | if (!isSameReg(Op, Def)) | 
|  | 487 | continue; | 
|  | 488 |  | 
|  | 489 | const MachineInstr *DefInst = Def.getParent(); | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 490 | if (!TII->isFoldableCopy(*DefInst)) | 
| Sam Kolton | 27e0f8b | 2017-03-31 11:42:43 +0000 | [diff] [blame] | 491 | return None; | 
|  | 492 |  | 
|  | 493 | const MachineOperand &Copied = DefInst->getOperand(1); | 
|  | 494 | if (!Copied.isImm()) | 
|  | 495 | return None; | 
|  | 496 |  | 
|  | 497 | return Copied.getImm(); | 
|  | 498 | } | 
|  | 499 | } | 
|  | 500 |  | 
|  | 501 | return None; | 
|  | 502 | } | 
|  | 503 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 504 | std::unique_ptr<SDWAOperand> | 
|  | 505 | SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) { | 
|  | 506 | unsigned Opcode = MI.getOpcode(); | 
|  | 507 | switch (Opcode) { | 
|  | 508 | case AMDGPU::V_LSHRREV_B32_e32: | 
|  | 509 | case AMDGPU::V_ASHRREV_I32_e32: | 
|  | 510 | case AMDGPU::V_LSHLREV_B32_e32: | 
|  | 511 | case AMDGPU::V_LSHRREV_B32_e64: | 
|  | 512 | case AMDGPU::V_ASHRREV_I32_e64: | 
|  | 513 | case AMDGPU::V_LSHLREV_B32_e64: { | 
|  | 514 | // from: v_lshrrev_b32_e32 v1, 16/24, v0 | 
|  | 515 | // to SDWA src:v0 src_sel:WORD_1/BYTE_3 | 
|  | 516 |  | 
|  | 517 | // from: v_ashrrev_i32_e32 v1, 16/24, v0 | 
|  | 518 | // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1 | 
|  | 519 |  | 
|  | 520 | // from: v_lshlrev_b32_e32 v1, 16/24, v0 | 
|  | 521 | // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD | 
|  | 522 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 523 | auto Imm = foldToImm(*Src0); | 
|  | 524 | if (!Imm) | 
|  | 525 | break; | 
|  | 526 |  | 
|  | 527 | if (*Imm != 16 && *Imm != 24) | 
|  | 528 | break; | 
|  | 529 |  | 
|  | 530 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 531 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
|  | 532 | if (TRI->isPhysicalRegister(Src1->getReg()) || | 
|  | 533 | TRI->isPhysicalRegister(Dst->getReg())) | 
|  | 534 | break; | 
|  | 535 |  | 
|  | 536 | if (Opcode == AMDGPU::V_LSHLREV_B32_e32 || | 
|  | 537 | Opcode == AMDGPU::V_LSHLREV_B32_e64) { | 
|  | 538 | return make_unique<SDWADstOperand>( | 
|  | 539 | Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD); | 
|  | 540 | } else { | 
|  | 541 | return make_unique<SDWASrcOperand>( | 
|  | 542 | Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false, | 
|  | 543 | Opcode != AMDGPU::V_LSHRREV_B32_e32 && | 
|  | 544 | Opcode != AMDGPU::V_LSHRREV_B32_e64); | 
|  | 545 | } | 
|  | 546 | break; | 
|  | 547 | } | 
|  | 548 |  | 
|  | 549 | case AMDGPU::V_LSHRREV_B16_e32: | 
|  | 550 | case AMDGPU::V_ASHRREV_I16_e32: | 
|  | 551 | case AMDGPU::V_LSHLREV_B16_e32: | 
|  | 552 | case AMDGPU::V_LSHRREV_B16_e64: | 
|  | 553 | case AMDGPU::V_ASHRREV_I16_e64: | 
|  | 554 | case AMDGPU::V_LSHLREV_B16_e64: { | 
|  | 555 | // from: v_lshrrev_b16_e32 v1, 8, v0 | 
|  | 556 | // to SDWA src:v0 src_sel:BYTE_1 | 
|  | 557 |  | 
|  | 558 | // from: v_ashrrev_i16_e32 v1, 8, v0 | 
|  | 559 | // to SDWA src:v0 src_sel:BYTE_1 sext:1 | 
|  | 560 |  | 
|  | 561 | // from: v_lshlrev_b16_e32 v1, 8, v0 | 
|  | 562 | // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD | 
|  | 563 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 564 | auto Imm = foldToImm(*Src0); | 
|  | 565 | if (!Imm || *Imm != 8) | 
|  | 566 | break; | 
|  | 567 |  | 
|  | 568 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 569 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
|  | 570 |  | 
|  | 571 | if (TRI->isPhysicalRegister(Src1->getReg()) || | 
|  | 572 | TRI->isPhysicalRegister(Dst->getReg())) | 
|  | 573 | break; | 
|  | 574 |  | 
|  | 575 | if (Opcode == AMDGPU::V_LSHLREV_B16_e32 || | 
|  | 576 | Opcode == AMDGPU::V_LSHLREV_B16_e64) { | 
|  | 577 | return make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD); | 
|  | 578 | } else { | 
|  | 579 | return make_unique<SDWASrcOperand>( | 
|  | 580 | Src1, Dst, BYTE_1, false, false, | 
|  | 581 | Opcode != AMDGPU::V_LSHRREV_B16_e32 && | 
|  | 582 | Opcode != AMDGPU::V_LSHRREV_B16_e64); | 
|  | 583 | } | 
|  | 584 | break; | 
|  | 585 | } | 
|  | 586 |  | 
|  | 587 | case AMDGPU::V_BFE_I32: | 
|  | 588 | case AMDGPU::V_BFE_U32: { | 
|  | 589 | // e.g.: | 
|  | 590 | // from: v_bfe_u32 v1, v0, 8, 8 | 
|  | 591 | // to SDWA src:v0 src_sel:BYTE_1 | 
|  | 592 |  | 
|  | 593 | // offset | width | src_sel | 
|  | 594 | // ------------------------ | 
|  | 595 | // 0      | 8     | BYTE_0 | 
|  | 596 | // 0      | 16    | WORD_0 | 
|  | 597 | // 0      | 32    | DWORD ? | 
|  | 598 | // 8      | 8     | BYTE_1 | 
|  | 599 | // 16     | 8     | BYTE_2 | 
|  | 600 | // 16     | 16    | WORD_1 | 
|  | 601 | // 24     | 8     | BYTE_3 | 
|  | 602 |  | 
|  | 603 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 604 | auto Offset = foldToImm(*Src1); | 
|  | 605 | if (!Offset) | 
|  | 606 | break; | 
|  | 607 |  | 
|  | 608 | MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); | 
|  | 609 | auto Width = foldToImm(*Src2); | 
|  | 610 | if (!Width) | 
|  | 611 | break; | 
|  | 612 |  | 
|  | 613 | SdwaSel SrcSel = DWORD; | 
|  | 614 |  | 
|  | 615 | if (*Offset == 0 && *Width == 8) | 
|  | 616 | SrcSel = BYTE_0; | 
|  | 617 | else if (*Offset == 0 && *Width == 16) | 
|  | 618 | SrcSel = WORD_0; | 
|  | 619 | else if (*Offset == 0 && *Width == 32) | 
|  | 620 | SrcSel = DWORD; | 
|  | 621 | else if (*Offset == 8 && *Width == 8) | 
|  | 622 | SrcSel = BYTE_1; | 
|  | 623 | else if (*Offset == 16 && *Width == 8) | 
|  | 624 | SrcSel = BYTE_2; | 
|  | 625 | else if (*Offset == 16 && *Width == 16) | 
|  | 626 | SrcSel = WORD_1; | 
|  | 627 | else if (*Offset == 24 && *Width == 8) | 
|  | 628 | SrcSel = BYTE_3; | 
|  | 629 | else | 
|  | 630 | break; | 
|  | 631 |  | 
|  | 632 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 633 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
|  | 634 |  | 
|  | 635 | if (TRI->isPhysicalRegister(Src0->getReg()) || | 
|  | 636 | TRI->isPhysicalRegister(Dst->getReg())) | 
|  | 637 | break; | 
|  | 638 |  | 
|  | 639 | return make_unique<SDWASrcOperand>( | 
|  | 640 | Src0, Dst, SrcSel, false, false, Opcode != AMDGPU::V_BFE_U32); | 
|  | 641 | } | 
|  | 642 |  | 
|  | 643 | case AMDGPU::V_AND_B32_e32: | 
|  | 644 | case AMDGPU::V_AND_B32_e64: { | 
|  | 645 | // e.g.: | 
|  | 646 | // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0 | 
|  | 647 | // to SDWA src:v0 src_sel:WORD_0/BYTE_0 | 
|  | 648 |  | 
|  | 649 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 650 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 651 | auto ValSrc = Src1; | 
|  | 652 | auto Imm = foldToImm(*Src0); | 
|  | 653 |  | 
|  | 654 | if (!Imm) { | 
|  | 655 | Imm = foldToImm(*Src1); | 
|  | 656 | ValSrc = Src0; | 
|  | 657 | } | 
|  | 658 |  | 
|  | 659 | if (!Imm || (*Imm != 0x0000ffff && *Imm != 0x000000ff)) | 
|  | 660 | break; | 
|  | 661 |  | 
|  | 662 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
|  | 663 |  | 
|  | 664 | if (TRI->isPhysicalRegister(Src1->getReg()) || | 
|  | 665 | TRI->isPhysicalRegister(Dst->getReg())) | 
|  | 666 | break; | 
|  | 667 |  | 
|  | 668 | return make_unique<SDWASrcOperand>( | 
|  | 669 | ValSrc, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0); | 
|  | 670 | } | 
|  | 671 |  | 
|  | 672 | case AMDGPU::V_OR_B32_e32: | 
|  | 673 | case AMDGPU::V_OR_B32_e64: { | 
|  | 674 | // Patterns for dst_unused:UNUSED_PRESERVE. | 
|  | 675 | // e.g., from: | 
|  | 676 | // v_add_f16_sdwa v0, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD | 
|  | 677 | //                           src1_sel:WORD_1 src2_sel:WORD1 | 
|  | 678 | // v_add_f16_e32 v3, v1, v2 | 
|  | 679 | // v_or_b32_e32 v4, v0, v3 | 
|  | 680 | // to SDWA preserve dst:v4 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE preserve:v3 | 
|  | 681 |  | 
|  | 682 | // Check if one of operands of v_or_b32 is SDWA instruction | 
|  | 683 | using CheckRetType = Optional<std::pair<MachineOperand *, MachineOperand *>>; | 
|  | 684 | auto CheckOROperandsForSDWA = | 
|  | 685 | [&](const MachineOperand *Op1, const MachineOperand *Op2) -> CheckRetType { | 
|  | 686 | if (!Op1 || !Op1->isReg() || !Op2 || !Op2->isReg()) | 
|  | 687 | return CheckRetType(None); | 
|  | 688 |  | 
|  | 689 | MachineOperand *Op1Def = findSingleRegDef(Op1, MRI); | 
|  | 690 | if (!Op1Def) | 
|  | 691 | return CheckRetType(None); | 
|  | 692 |  | 
|  | 693 | MachineInstr *Op1Inst = Op1Def->getParent(); | 
|  | 694 | if (!TII->isSDWA(*Op1Inst)) | 
|  | 695 | return CheckRetType(None); | 
|  | 696 |  | 
|  | 697 | MachineOperand *Op2Def = findSingleRegDef(Op2, MRI); | 
|  | 698 | if (!Op2Def) | 
|  | 699 | return CheckRetType(None); | 
|  | 700 |  | 
|  | 701 | return CheckRetType(std::make_pair(Op1Def, Op2Def)); | 
|  | 702 | }; | 
|  | 703 |  | 
|  | 704 | MachineOperand *OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 705 | MachineOperand *OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 706 | assert(OrSDWA && OrOther); | 
|  | 707 | auto Res = CheckOROperandsForSDWA(OrSDWA, OrOther); | 
|  | 708 | if (!Res) { | 
|  | 709 | OrSDWA = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 710 | OrOther = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 711 | assert(OrSDWA && OrOther); | 
|  | 712 | Res = CheckOROperandsForSDWA(OrSDWA, OrOther); | 
|  | 713 | if (!Res) | 
|  | 714 | break; | 
|  | 715 | } | 
|  | 716 |  | 
|  | 717 | MachineOperand *OrSDWADef = Res->first; | 
|  | 718 | MachineOperand *OrOtherDef = Res->second; | 
|  | 719 | assert(OrSDWADef && OrOtherDef); | 
|  | 720 |  | 
|  | 721 | MachineInstr *SDWAInst = OrSDWADef->getParent(); | 
|  | 722 | MachineInstr *OtherInst = OrOtherDef->getParent(); | 
|  | 723 |  | 
|  | 724 | // Check that OtherInstr is actually bitwise compatible with SDWAInst = their | 
|  | 725 | // destination patterns don't overlap. Compatible instruction can be either | 
|  | 726 | // regular instruction with compatible bitness or SDWA instruction with | 
|  | 727 | // correct dst_sel | 
|  | 728 | // SDWAInst | OtherInst bitness / OtherInst dst_sel | 
|  | 729 | // ----------------------------------------------------- | 
|  | 730 | // DWORD    | no                    / no | 
|  | 731 | // WORD_0   | no                    / BYTE_2/3, WORD_1 | 
|  | 732 | // WORD_1   | 8/16-bit instructions / BYTE_0/1, WORD_0 | 
|  | 733 | // BYTE_0   | no                    / BYTE_1/2/3, WORD_1 | 
|  | 734 | // BYTE_1   | 8-bit                 / BYTE_0/2/3, WORD_1 | 
|  | 735 | // BYTE_2   | 8/16-bit              / BYTE_0/1/3. WORD_0 | 
|  | 736 | // BYTE_3   | 8/16/24-bit           / BYTE_0/1/2, WORD_0 | 
|  | 737 | // E.g. if SDWAInst is v_add_f16_sdwa dst_sel:WORD_1 then v_add_f16 is OK | 
|  | 738 | // but v_add_f32 is not. | 
|  | 739 |  | 
|  | 740 | // TODO: add support for non-SDWA instructions as OtherInst. | 
|  | 741 | // For now this only works with SDWA instructions. For regular instructions | 
| Michael Bedy | 80cf9ff | 2018-03-11 03:27:50 +0000 | [diff] [blame] | 742 | // there is no way to determine if the instruction writes only 8/16/24-bit | 
|  | 743 | // out of full register size and all registers are at min 32-bit wide. | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 744 | if (!TII->isSDWA(*OtherInst)) | 
|  | 745 | break; | 
|  | 746 |  | 
|  | 747 | SdwaSel DstSel = static_cast<SdwaSel>( | 
|  | 748 | TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));; | 
|  | 749 | SdwaSel OtherDstSel = static_cast<SdwaSel>( | 
|  | 750 | TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel)); | 
|  | 751 |  | 
|  | 752 | bool DstSelAgree = false; | 
|  | 753 | switch (DstSel) { | 
|  | 754 | case WORD_0: DstSelAgree = ((OtherDstSel == BYTE_2) || | 
|  | 755 | (OtherDstSel == BYTE_3) || | 
|  | 756 | (OtherDstSel == WORD_1)); | 
|  | 757 | break; | 
|  | 758 | case WORD_1: DstSelAgree = ((OtherDstSel == BYTE_0) || | 
|  | 759 | (OtherDstSel == BYTE_1) || | 
|  | 760 | (OtherDstSel == WORD_0)); | 
|  | 761 | break; | 
|  | 762 | case BYTE_0: DstSelAgree = ((OtherDstSel == BYTE_1) || | 
|  | 763 | (OtherDstSel == BYTE_2) || | 
|  | 764 | (OtherDstSel == BYTE_3) || | 
|  | 765 | (OtherDstSel == WORD_1)); | 
|  | 766 | break; | 
|  | 767 | case BYTE_1: DstSelAgree = ((OtherDstSel == BYTE_0) || | 
|  | 768 | (OtherDstSel == BYTE_2) || | 
|  | 769 | (OtherDstSel == BYTE_3) || | 
|  | 770 | (OtherDstSel == WORD_1)); | 
|  | 771 | break; | 
|  | 772 | case BYTE_2: DstSelAgree = ((OtherDstSel == BYTE_0) || | 
|  | 773 | (OtherDstSel == BYTE_1) || | 
|  | 774 | (OtherDstSel == BYTE_3) || | 
|  | 775 | (OtherDstSel == WORD_0)); | 
|  | 776 | break; | 
|  | 777 | case BYTE_3: DstSelAgree = ((OtherDstSel == BYTE_0) || | 
|  | 778 | (OtherDstSel == BYTE_1) || | 
|  | 779 | (OtherDstSel == BYTE_2) || | 
|  | 780 | (OtherDstSel == WORD_0)); | 
|  | 781 | break; | 
|  | 782 | default: DstSelAgree = false; | 
|  | 783 | } | 
|  | 784 |  | 
|  | 785 | if (!DstSelAgree) | 
|  | 786 | break; | 
|  | 787 |  | 
|  | 788 | // Also OtherInst dst_unused should be UNUSED_PAD | 
|  | 789 | DstUnused OtherDstUnused = static_cast<DstUnused>( | 
|  | 790 | TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_unused)); | 
|  | 791 | if (OtherDstUnused != DstUnused::UNUSED_PAD) | 
|  | 792 | break; | 
|  | 793 |  | 
|  | 794 | // Create DstPreserveOperand | 
|  | 795 | MachineOperand *OrDst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
|  | 796 | assert(OrDst && OrDst->isReg()); | 
|  | 797 |  | 
|  | 798 | return make_unique<SDWADstPreserveOperand>( | 
|  | 799 | OrDst, OrSDWADef, OrOtherDef, DstSel); | 
|  | 800 |  | 
|  | 801 | } | 
|  | 802 | } | 
|  | 803 |  | 
|  | 804 | return std::unique_ptr<SDWAOperand>(nullptr); | 
|  | 805 | } | 
|  | 806 |  | 
| Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 807 | void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) { | 
|  | 808 | for (MachineInstr &MI : MBB) { | 
|  | 809 | if (auto Operand = matchSDWAOperand(MI)) { | 
|  | 810 | DEBUG(dbgs() << "Match: " << MI << "To: " << *Operand << '\n'); | 
|  | 811 | SDWAOperands[&MI] = std::move(Operand); | 
|  | 812 | ++NumSDWAPatternsFound; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 813 | } | 
|  | 814 | } | 
|  | 815 | } | 
|  | 816 |  | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 817 | bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI, | 
|  | 818 | const SISubtarget &ST) const { | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 819 | // Check if this is already an SDWA instruction | 
|  | 820 | unsigned Opc = MI.getOpcode(); | 
|  | 821 | if (TII->isSDWA(Opc)) | 
|  | 822 | return true; | 
|  | 823 |  | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 824 | // Check if this instruction has opcode that supports SDWA | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 825 | if (AMDGPU::getSDWAOp(Opc) == -1) | 
|  | 826 | Opc = AMDGPU::getVOPe32(Opc); | 
|  | 827 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 828 | if (AMDGPU::getSDWAOp(Opc) == -1) | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 829 | return false; | 
|  | 830 |  | 
|  | 831 | if (!ST.hasSDWAOmod() && TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) | 
|  | 832 | return false; | 
|  | 833 |  | 
|  | 834 | if (TII->isVOPC(Opc)) { | 
|  | 835 | if (!ST.hasSDWASdst()) { | 
|  | 836 | const MachineOperand *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst); | 
|  | 837 | if (SDst && SDst->getReg() != AMDGPU::VCC) | 
|  | 838 | return false; | 
|  | 839 | } | 
|  | 840 |  | 
| Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 841 | if (!ST.hasSDWAOutModsVOPC() && | 
|  | 842 | (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) || | 
|  | 843 | TII->hasModifiersSet(MI, AMDGPU::OpName::omod))) | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 844 | return false; | 
|  | 845 |  | 
| Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 846 | } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) || | 
|  | 847 | !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) { | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 848 | return false; | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 849 | } | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 850 |  | 
|  | 851 | if (!ST.hasSDWAMac() && (Opc == AMDGPU::V_MAC_F16_e32 || | 
|  | 852 | Opc == AMDGPU::V_MAC_F32_e32)) | 
|  | 853 | return false; | 
|  | 854 |  | 
|  | 855 | return true; | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 856 | } | 
|  | 857 |  | 
|  | 858 | bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, | 
|  | 859 | const SDWAOperandsVector &SDWAOperands) { | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 860 | // Convert to sdwa | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 861 | int SDWAOpcode; | 
|  | 862 | unsigned Opcode = MI.getOpcode(); | 
|  | 863 | if (TII->isSDWA(Opcode)) { | 
|  | 864 | SDWAOpcode = Opcode; | 
|  | 865 | } else { | 
|  | 866 | SDWAOpcode = AMDGPU::getSDWAOp(Opcode); | 
|  | 867 | if (SDWAOpcode == -1) | 
|  | 868 | SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(Opcode)); | 
|  | 869 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 870 | assert(SDWAOpcode != -1); | 
|  | 871 |  | 
|  | 872 | const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode); | 
|  | 873 |  | 
|  | 874 | // Create SDWA version of instruction MI and initialize its operands | 
|  | 875 | MachineInstrBuilder SDWAInst = | 
|  | 876 | BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc); | 
|  | 877 |  | 
| Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 878 | // Copy dst, if it is present in original then should also be present in SDWA | 
|  | 879 | MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 880 | if (Dst) { | 
|  | 881 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); | 
|  | 882 | SDWAInst.add(*Dst); | 
| Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 883 | } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) { | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 884 | assert(Dst && | 
|  | 885 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); | 
|  | 886 | SDWAInst.add(*Dst); | 
| Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 887 | } else { | 
|  | 888 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); | 
|  | 889 | SDWAInst.addReg(AMDGPU::VCC, RegState::Define); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 890 | } | 
|  | 891 |  | 
|  | 892 | // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and | 
|  | 893 | // src0_modifiers (except for v_nop_sdwa, but it can't get here) | 
|  | 894 | MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); | 
|  | 895 | assert( | 
|  | 896 | Src0 && | 
|  | 897 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && | 
|  | 898 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 899 | if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) | 
|  | 900 | SDWAInst.addImm(Mod->getImm()); | 
|  | 901 | else | 
|  | 902 | SDWAInst.addImm(0); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 903 | SDWAInst.add(*Src0); | 
|  | 904 |  | 
|  | 905 | // Copy src1 if present, initialize src1_modifiers. | 
|  | 906 | MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); | 
|  | 907 | if (Src1) { | 
|  | 908 | assert( | 
|  | 909 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && | 
|  | 910 | AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); | 
| Stanislav Mekhanoshin | 0330660 | 2017-06-03 17:39:47 +0000 | [diff] [blame] | 911 | if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) | 
|  | 912 | SDWAInst.addImm(Mod->getImm()); | 
|  | 913 | else | 
|  | 914 | SDWAInst.addImm(0); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 915 | SDWAInst.add(*Src1); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 916 | } | 
|  | 917 |  | 
|  | 918 | if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa || | 
|  | 919 | SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) { | 
|  | 920 | // v_mac_f16/32 has additional src2 operand tied to vdst | 
|  | 921 | MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); | 
|  | 922 | assert(Src2); | 
|  | 923 | SDWAInst.add(*Src2); | 
|  | 924 | } | 
|  | 925 |  | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 926 | // Copy clamp if present, initialize otherwise | 
|  | 927 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1); | 
|  | 928 | MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp); | 
|  | 929 | if (Clamp) { | 
|  | 930 | SDWAInst.add(*Clamp); | 
|  | 931 | } else { | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 932 | SDWAInst.addImm(0); | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 933 | } | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 934 |  | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 935 | // Copy omod if present, initialize otherwise if needed | 
| Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 936 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { | 
|  | 937 | MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); | 
|  | 938 | if (OMod) { | 
|  | 939 | SDWAInst.add(*OMod); | 
|  | 940 | } else { | 
|  | 941 | SDWAInst.addImm(0); | 
|  | 942 | } | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 943 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 944 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 945 | // Copy dst_sel if present, initialize otherwise if needed | 
| Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 946 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) { | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 947 | MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); | 
|  | 948 | if (DstSel) { | 
|  | 949 | SDWAInst.add(*DstSel); | 
|  | 950 | } else { | 
|  | 951 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); | 
|  | 952 | } | 
|  | 953 | } | 
|  | 954 |  | 
|  | 955 | // Copy dst_unused if present, initialize otherwise if needed | 
|  | 956 | if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) { | 
|  | 957 | MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); | 
|  | 958 | if (DstUnused) { | 
|  | 959 | SDWAInst.add(*DstUnused); | 
|  | 960 | } else { | 
|  | 961 | SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD); | 
|  | 962 | } | 
|  | 963 | } | 
|  | 964 |  | 
|  | 965 | // Copy src0_sel if present, initialize otherwise | 
|  | 966 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1); | 
|  | 967 | MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); | 
|  | 968 | if (Src0Sel) { | 
|  | 969 | SDWAInst.add(*Src0Sel); | 
|  | 970 | } else { | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 971 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); | 
| Sam Kolton | a179d25 | 2017-06-27 15:02:23 +0000 | [diff] [blame] | 972 | } | 
|  | 973 |  | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 974 | // Copy src1_sel if present, initialize otherwise if needed | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 975 | if (Src1) { | 
|  | 976 | assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1); | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 977 | MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); | 
|  | 978 | if (Src1Sel) { | 
|  | 979 | SDWAInst.add(*Src1Sel); | 
|  | 980 | } else { | 
|  | 981 | SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD); | 
|  | 982 | } | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 983 | } | 
|  | 984 |  | 
| Matt Arsenault | c24d5e2 | 2018-02-08 22:46:38 +0000 | [diff] [blame] | 985 | // Apply all sdwa operand patterns. | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 986 | bool Converted = false; | 
|  | 987 | for (auto &Operand : SDWAOperands) { | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 988 | // There should be no intesection between SDWA operands and potential MIs | 
|  | 989 | // e.g.: | 
|  | 990 | // v_and_b32 v0, 0xff, v1 -> src:v1 sel:BYTE_0 | 
|  | 991 | // v_and_b32 v2, 0xff, v0 -> src:v0 sel:BYTE_0 | 
|  | 992 | // v_add_u32 v3, v4, v2 | 
|  | 993 | // | 
|  | 994 | // In that example it is possible that we would fold 2nd instruction into 3rd | 
|  | 995 | // (v_add_u32_sdwa) and then try to fold 1st instruction into 2nd (that was | 
|  | 996 | // already destroyed). So if SDWAOperand is also a potential MI then do not | 
|  | 997 | // apply it. | 
|  | 998 | if (PotentialMatches.count(Operand->getParentInst()) == 0) | 
|  | 999 | Converted |= Operand->convertToSDWA(*SDWAInst, TII); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1000 | } | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1001 | if (Converted) { | 
|  | 1002 | ConvertedInstructions.push_back(SDWAInst); | 
|  | 1003 | } else { | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1004 | SDWAInst->eraseFromParent(); | 
|  | 1005 | return false; | 
|  | 1006 | } | 
|  | 1007 |  | 
|  | 1008 | DEBUG(dbgs() << "Convert instruction:" << MI | 
|  | 1009 | << "Into:" << *SDWAInst << '\n'); | 
|  | 1010 | ++NumSDWAInstructionsPeepholed; | 
|  | 1011 |  | 
|  | 1012 | MI.eraseFromParent(); | 
|  | 1013 | return true; | 
|  | 1014 | } | 
|  | 1015 |  | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1016 | // If an instruction was converted to SDWA it should not have immediates or SGPR | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1017 | // operands (allowed one SGPR on GFX9). Copy its scalar operands into VGPRs. | 
| Matt Arsenault | c24d5e2 | 2018-02-08 22:46:38 +0000 | [diff] [blame] | 1018 | void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, | 
|  | 1019 | const SISubtarget &ST) const { | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1020 | const MCInstrDesc &Desc = TII->get(MI.getOpcode()); | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1021 | unsigned ConstantBusCount = 0; | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1022 | for (MachineOperand &Op : MI.explicit_uses()) { | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1023 | if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg()))) | 
|  | 1024 | continue; | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1025 |  | 
|  | 1026 | unsigned I = MI.getOperandNo(&Op); | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1027 | if (Desc.OpInfo[I].RegClass == -1 || | 
|  | 1028 | !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass))) | 
|  | 1029 | continue; | 
| Sam Kolton | 3c4933f | 2017-06-22 06:26:41 +0000 | [diff] [blame] | 1030 |  | 
|  | 1031 | if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() && | 
|  | 1032 | TRI->isSGPRReg(*MRI, Op.getReg())) { | 
|  | 1033 | ++ConstantBusCount; | 
|  | 1034 | continue; | 
|  | 1035 | } | 
|  | 1036 |  | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1037 | unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); | 
|  | 1038 | auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(), | 
|  | 1039 | TII->get(AMDGPU::V_MOV_B32_e32), VGPR); | 
|  | 1040 | if (Op.isImm()) | 
|  | 1041 | Copy.addImm(Op.getImm()); | 
|  | 1042 | else if (Op.isReg()) | 
|  | 1043 | Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0, | 
|  | 1044 | Op.getSubReg()); | 
|  | 1045 | Op.ChangeToRegister(VGPR, false); | 
|  | 1046 | } | 
|  | 1047 | } | 
|  | 1048 |  | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1049 | bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { | 
|  | 1050 | const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); | 
|  | 1051 |  | 
| Matthias Braun | f1caa28 | 2017-12-15 22:22:58 +0000 | [diff] [blame] | 1052 | if (!ST.hasSDWA() || skipFunction(MF.getFunction())) | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1053 | return false; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1054 |  | 
|  | 1055 | MRI = &MF.getRegInfo(); | 
|  | 1056 | TRI = ST.getRegisterInfo(); | 
|  | 1057 | TII = ST.getInstrInfo(); | 
| Sam Kolton | 549c89d | 2017-06-21 08:53:38 +0000 | [diff] [blame] | 1058 |  | 
| Sam Kolton | ebfdaf7 | 2017-05-18 12:12:03 +0000 | [diff] [blame] | 1059 | // Find all SDWA operands in MF. | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1060 | bool Ret = false; | 
| Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1061 | for (MachineBasicBlock &MBB : MF) { | 
|  | 1062 | bool Changed = false; | 
|  | 1063 | do { | 
|  | 1064 | matchSDWAOperands(MBB); | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1065 |  | 
| Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1066 | for (const auto &OperandPair : SDWAOperands) { | 
|  | 1067 | const auto &Operand = OperandPair.second; | 
|  | 1068 | MachineInstr *PotentialMI = Operand->potentialToConvert(TII); | 
|  | 1069 | if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) { | 
|  | 1070 | PotentialMatches[PotentialMI].push_back(Operand.get()); | 
|  | 1071 | } | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1072 | } | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 1073 |  | 
| Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1074 | for (auto &PotentialPair : PotentialMatches) { | 
|  | 1075 | MachineInstr &PotentialMI = *PotentialPair.first; | 
|  | 1076 | convertToSDWA(PotentialMI, PotentialPair.second); | 
|  | 1077 | } | 
| Sam Kolton | aff8341 | 2017-04-12 09:36:05 +0000 | [diff] [blame] | 1078 |  | 
| Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1079 | PotentialMatches.clear(); | 
|  | 1080 | SDWAOperands.clear(); | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1081 |  | 
| Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1082 | Changed = !ConvertedInstructions.empty(); | 
| Sam Kolton | 5f7f32c | 2017-12-04 16:22:32 +0000 | [diff] [blame] | 1083 |  | 
| Matt Arsenault | 9c2f3c4 | 2018-02-08 22:46:41 +0000 | [diff] [blame] | 1084 | if (Changed) | 
|  | 1085 | Ret = true; | 
|  | 1086 | while (!ConvertedInstructions.empty()) | 
|  | 1087 | legalizeScalarOperands(*ConvertedInstructions.pop_back_val(), ST); | 
|  | 1088 | } while (Changed); | 
|  | 1089 | } | 
| Stanislav Mekhanoshin | 56ea488 | 2017-05-30 16:49:24 +0000 | [diff] [blame] | 1090 |  | 
| Stanislav Mekhanoshin | e4cda74 | 2017-06-06 16:42:30 +0000 | [diff] [blame] | 1091 | return Ret; | 
| Sam Kolton | f60ad58 | 2017-03-21 12:51:34 +0000 | [diff] [blame] | 1092 | } |