Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 1 | //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0 |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 9 | // operand. If any of the use instruction cannot be combined with the mov the |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 10 | // whole sequence is reverted. |
| 11 | // |
| 12 | // $old = ... |
| 13 | // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane, |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 14 | // dpp_controls..., $row_mask, $bank_mask, $bound_ctrl |
| 15 | // $res = VALU $dpp_value [, src1] |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 16 | // |
| 17 | // to |
| 18 | // |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 19 | // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,] |
| 20 | // dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 21 | // |
| 22 | // Combining rules : |
| 23 | // |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 24 | // if $row_mask and $bank_mask are fully enabled (0xF) and |
| 25 | // $bound_ctrl==DPP_BOUND_ZERO or $old==0 |
| 26 | // -> $combined_old = undef, |
| 27 | // $combined_bound_ctrl = DPP_BOUND_ZERO |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 28 | // |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 29 | // if the VALU op is binary and |
| 30 | // $bound_ctrl==DPP_BOUND_OFF and |
| 31 | // $old==identity value (immediate) for the VALU op |
| 32 | // -> $combined_old = src1, |
| 33 | // $combined_bound_ctrl = DPP_BOUND_OFF |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 34 | // |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 35 | // Othervise cancel. |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 36 | // |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 37 | // The mov_dpp instruction should recide in the same BB as all it's uses |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 38 | //===----------------------------------------------------------------------===// |
| 39 | |
| 40 | #include "AMDGPU.h" |
| 41 | #include "AMDGPUSubtarget.h" |
| 42 | #include "SIInstrInfo.h" |
| 43 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 44 | #include "llvm/ADT/SmallVector.h" |
| 45 | #include "llvm/ADT/Statistic.h" |
| 46 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 47 | #include "llvm/CodeGen/MachineFunction.h" |
| 48 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 49 | #include "llvm/CodeGen/MachineInstr.h" |
| 50 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 51 | #include "llvm/CodeGen/MachineOperand.h" |
| 52 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 53 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
| 54 | #include "llvm/Pass.h" |
| 55 | #include <cassert> |
| 56 | |
| 57 | using namespace llvm; |
| 58 | |
| 59 | #define DEBUG_TYPE "gcn-dpp-combine" |
| 60 | |
| 61 | STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined."); |
| 62 | |
| 63 | namespace { |
| 64 | |
| 65 | class GCNDPPCombine : public MachineFunctionPass { |
| 66 | MachineRegisterInfo *MRI; |
| 67 | const SIInstrInfo *TII; |
| 68 | |
| 69 | using RegSubRegPair = TargetInstrInfo::RegSubRegPair; |
| 70 | |
| 71 | MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const; |
| 72 | |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 73 | MachineInstr *createDPPInst(MachineInstr &OrigMI, |
| 74 | MachineInstr &MovMI, |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 75 | RegSubRegPair CombOldVGPR, |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 76 | MachineOperand *OldOpnd, |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 77 | bool CombBCZ) const; |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 78 | |
| 79 | MachineInstr *createDPPInst(MachineInstr &OrigMI, |
| 80 | MachineInstr &MovMI, |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 81 | RegSubRegPair CombOldVGPR, |
| 82 | bool CombBCZ) const; |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 83 | |
| 84 | bool hasNoImmOrEqual(MachineInstr &MI, |
| 85 | unsigned OpndName, |
| 86 | int64_t Value, |
| 87 | int64_t Mask = -1) const; |
| 88 | |
| 89 | bool combineDPPMov(MachineInstr &MI) const; |
| 90 | |
| 91 | public: |
| 92 | static char ID; |
| 93 | |
| 94 | GCNDPPCombine() : MachineFunctionPass(ID) { |
| 95 | initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry()); |
| 96 | } |
| 97 | |
| 98 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 99 | |
| 100 | StringRef getPassName() const override { return "GCN DPP Combine"; } |
| 101 | |
| 102 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 103 | AU.setPreservesCFG(); |
| 104 | MachineFunctionPass::getAnalysisUsage(AU); |
| 105 | } |
| 106 | }; |
| 107 | |
| 108 | } // end anonymous namespace |
| 109 | |
| 110 | INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false) |
| 111 | |
| 112 | char GCNDPPCombine::ID = 0; |
| 113 | |
| 114 | char &llvm::GCNDPPCombineID = GCNDPPCombine::ID; |
| 115 | |
| 116 | FunctionPass *llvm::createGCNDPPCombinePass() { |
| 117 | return new GCNDPPCombine(); |
| 118 | } |
| 119 | |
| 120 | static int getDPPOp(unsigned Op) { |
| 121 | auto DPP32 = AMDGPU::getDPPOp32(Op); |
| 122 | if (DPP32 != -1) |
| 123 | return DPP32; |
| 124 | |
| 125 | auto E32 = AMDGPU::getVOPe32(Op); |
| 126 | return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1; |
| 127 | } |
| 128 | |
| 129 | // tracks the register operand definition and returns: |
| 130 | // 1. immediate operand used to initialize the register if found |
| 131 | // 2. nullptr if the register operand is undef |
| 132 | // 3. the operand itself otherwise |
| 133 | MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const { |
| 134 | auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI); |
| 135 | if (!Def) |
| 136 | return nullptr; |
| 137 | |
| 138 | switch(Def->getOpcode()) { |
| 139 | default: break; |
| 140 | case AMDGPU::IMPLICIT_DEF: |
| 141 | return nullptr; |
| 142 | case AMDGPU::COPY: |
| 143 | case AMDGPU::V_MOV_B32_e32: { |
| 144 | auto &Op1 = Def->getOperand(1); |
| 145 | if (Op1.isImm()) |
| 146 | return &Op1; |
| 147 | break; |
| 148 | } |
| 149 | } |
| 150 | return &OldOpnd; |
| 151 | } |
| 152 | |
| 153 | MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, |
| 154 | MachineInstr &MovMI, |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 155 | RegSubRegPair CombOldVGPR, |
| 156 | bool CombBCZ) const { |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 157 | assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp); |
| 158 | assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() == |
| 159 | TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg()); |
| 160 | |
| 161 | auto OrigOp = OrigMI.getOpcode(); |
| 162 | auto DPPOp = getDPPOp(OrigOp); |
| 163 | if (DPPOp == -1) { |
| 164 | LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n"); |
| 165 | return nullptr; |
| 166 | } |
| 167 | |
| 168 | auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI, |
| 169 | OrigMI.getDebugLoc(), TII->get(DPPOp)); |
| 170 | bool Fail = false; |
| 171 | do { |
| 172 | auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst); |
| 173 | assert(Dst); |
| 174 | DPPInst.add(*Dst); |
| 175 | int NumOperands = 1; |
| 176 | |
| 177 | const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old); |
| 178 | if (OldIdx != -1) { |
| 179 | assert(OldIdx == NumOperands); |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 180 | assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)); |
| 181 | DPPInst.addReg(CombOldVGPR.Reg, 0, CombOldVGPR.SubReg); |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 182 | ++NumOperands; |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 183 | } else { |
| 184 | // TODO: this discards MAC/FMA instructions for now, let's add it later |
| 185 | LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction," |
| 186 | " TBD\n"); |
| 187 | Fail = true; |
| 188 | break; |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 189 | } |
| 190 | |
| 191 | if (auto *Mod0 = TII->getNamedOperand(OrigMI, |
| 192 | AMDGPU::OpName::src0_modifiers)) { |
| 193 | assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp, |
| 194 | AMDGPU::OpName::src0_modifiers)); |
| 195 | assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); |
| 196 | DPPInst.addImm(Mod0->getImm()); |
| 197 | ++NumOperands; |
| 198 | } |
| 199 | auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); |
| 200 | assert(Src0); |
| 201 | if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) { |
| 202 | LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n"); |
| 203 | Fail = true; |
| 204 | break; |
| 205 | } |
| 206 | DPPInst.add(*Src0); |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 207 | DPPInst->getOperand(NumOperands).setIsKill(false); |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 208 | ++NumOperands; |
| 209 | |
| 210 | if (auto *Mod1 = TII->getNamedOperand(OrigMI, |
| 211 | AMDGPU::OpName::src1_modifiers)) { |
| 212 | assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp, |
| 213 | AMDGPU::OpName::src1_modifiers)); |
| 214 | assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))); |
| 215 | DPPInst.addImm(Mod1->getImm()); |
| 216 | ++NumOperands; |
| 217 | } |
| 218 | if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { |
| 219 | if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) { |
| 220 | LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n"); |
| 221 | Fail = true; |
| 222 | break; |
| 223 | } |
| 224 | DPPInst.add(*Src1); |
| 225 | ++NumOperands; |
| 226 | } |
| 227 | |
| 228 | if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) { |
| 229 | if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) { |
| 230 | LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n"); |
| 231 | Fail = true; |
| 232 | break; |
| 233 | } |
| 234 | DPPInst.add(*Src2); |
| 235 | } |
| 236 | |
| 237 | DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl)); |
| 238 | DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask)); |
| 239 | DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask)); |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 240 | DPPInst.addImm(CombBCZ ? 1 : 0); |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 241 | } while (false); |
| 242 | |
| 243 | if (Fail) { |
| 244 | DPPInst.getInstr()->eraseFromParent(); |
| 245 | return nullptr; |
| 246 | } |
| 247 | LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr()); |
| 248 | return DPPInst.getInstr(); |
| 249 | } |
| 250 | |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 251 | static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) { |
| 252 | assert(OldOpnd->isImm()); |
| 253 | switch (OrigMIOp) { |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 254 | default: break; |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 255 | case AMDGPU::V_ADD_U32_e32: |
| 256 | case AMDGPU::V_ADD_I32_e32: |
| 257 | case AMDGPU::V_OR_B32_e32: |
| 258 | case AMDGPU::V_SUBREV_U32_e32: |
| 259 | case AMDGPU::V_SUBREV_I32_e32: |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 260 | case AMDGPU::V_MAX_U32_e32: |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 261 | case AMDGPU::V_XOR_B32_e32: |
| 262 | if (OldOpnd->getImm() == 0) |
| 263 | return true; |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 264 | break; |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 265 | case AMDGPU::V_AND_B32_e32: |
| 266 | case AMDGPU::V_MIN_U32_e32: |
| 267 | if (static_cast<uint32_t>(OldOpnd->getImm()) == |
| 268 | std::numeric_limits<uint32_t>::max()) |
| 269 | return true; |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 270 | break; |
| 271 | case AMDGPU::V_MIN_I32_e32: |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 272 | if (static_cast<int32_t>(OldOpnd->getImm()) == |
| 273 | std::numeric_limits<int32_t>::max()) |
| 274 | return true; |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 275 | break; |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 276 | case AMDGPU::V_MAX_I32_e32: |
| 277 | if (static_cast<int32_t>(OldOpnd->getImm()) == |
| 278 | std::numeric_limits<int32_t>::min()) |
| 279 | return true; |
| 280 | break; |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 281 | case AMDGPU::V_MUL_I32_I24_e32: |
| 282 | case AMDGPU::V_MUL_U32_U24_e32: |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 283 | if (OldOpnd->getImm() == 1) |
| 284 | return true; |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 285 | break; |
| 286 | } |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 287 | return false; |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 288 | } |
| 289 | |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 290 | MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, |
| 291 | MachineInstr &MovMI, |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 292 | RegSubRegPair CombOldVGPR, |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 293 | MachineOperand *OldOpndValue, |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 294 | bool CombBCZ) const { |
| 295 | assert(CombOldVGPR.Reg); |
| 296 | if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) { |
| 297 | auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); |
| 298 | if (!Src1 || !Src1->isReg()) { |
| 299 | LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n"); |
| 300 | return nullptr; |
| 301 | } |
| 302 | if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) { |
| 303 | LLVM_DEBUG(dbgs() << " failed: old immediate ins't an identity\n"); |
| 304 | return nullptr; |
| 305 | } |
| 306 | CombOldVGPR = getRegSubRegPair(*Src1); |
| 307 | if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) { |
| 308 | LLVM_DEBUG(dbgs() << " failed: src1 isn't a VGPR32 register\n"); |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 309 | return nullptr; |
| 310 | } |
| 311 | } |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 312 | return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ); |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 313 | } |
| 314 | |
| 315 | // returns true if MI doesn't have OpndName immediate operand or the |
| 316 | // operand has Value |
| 317 | bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName, |
| 318 | int64_t Value, int64_t Mask) const { |
| 319 | auto *Imm = TII->getNamedOperand(MI, OpndName); |
| 320 | if (!Imm) |
| 321 | return true; |
| 322 | |
| 323 | assert(Imm->isImm()); |
| 324 | return (Imm->getImm() & Mask) == Value; |
| 325 | } |
| 326 | |
| 327 | bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { |
| 328 | assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp); |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 329 | LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI); |
| 330 | |
| 331 | auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); |
| 332 | assert(DstOpnd && DstOpnd->isReg()); |
| 333 | auto DPPMovReg = DstOpnd->getReg(); |
| 334 | if (!isEXECMaskConstantBetweenDefAndUses(DPPMovReg, *MRI)) { |
| 335 | LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" |
| 336 | " for all uses\n"); |
| 337 | return false; |
| 338 | } |
| 339 | |
| 340 | auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask); |
| 341 | assert(RowMaskOpnd && RowMaskOpnd->isImm()); |
| 342 | auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask); |
| 343 | assert(BankMaskOpnd && BankMaskOpnd->isImm()); |
| 344 | const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF && |
| 345 | BankMaskOpnd->getImm() == 0xF; |
| 346 | |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 347 | auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl); |
| 348 | assert(BCZOpnd && BCZOpnd->isImm()); |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 349 | bool BoundCtrlZero = BCZOpnd->getImm(); |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 350 | |
| 351 | auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old); |
| 352 | assert(OldOpnd && OldOpnd->isReg()); |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 353 | |
| 354 | auto * const OldOpndValue = getOldOpndValue(*OldOpnd); |
| 355 | // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else |
| 356 | // We could use: assert(!OldOpndValue || OldOpndValue->isImm()) |
| 357 | // but the third option is used to distinguish undef from non-immediate |
| 358 | // to reuse IMPLICIT_DEF instruction later |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 359 | assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd); |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 360 | |
| 361 | bool CombBCZ = false; |
| 362 | |
| 363 | if (MaskAllLanes && BoundCtrlZero) { // [1] |
| 364 | CombBCZ = true; |
| 365 | } else { |
| 366 | if (!OldOpndValue || !OldOpndValue->isImm()) { |
| 367 | LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n"); |
| 368 | return false; |
| 369 | } |
| 370 | |
| 371 | if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) { |
| 372 | LLVM_DEBUG(dbgs() << |
| 373 | " failed: old reg def and mov should be in the same BB\n"); |
| 374 | return false; |
| 375 | } |
| 376 | |
| 377 | if (OldOpndValue->getImm() == 0) { |
| 378 | if (MaskAllLanes) { |
| 379 | assert(!BoundCtrlZero); // by check [1] |
| 380 | CombBCZ = true; |
Valery Pykhtin | 1e0b5c7 | 2019-01-09 13:43:32 +0000 | [diff] [blame] | 381 | } |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 382 | } else if (BoundCtrlZero) { |
| 383 | assert(!MaskAllLanes); // by check [1] |
| 384 | LLVM_DEBUG(dbgs() << |
| 385 | " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n"); |
| 386 | return false; |
Valery Pykhtin | 1e0b5c7 | 2019-01-09 13:43:32 +0000 | [diff] [blame] | 387 | } |
Valery Pykhtin | b7a4595 | 2019-01-09 15:21:53 +0000 | [diff] [blame] | 388 | } |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 389 | |
| 390 | LLVM_DEBUG(dbgs() << " old="; |
| 391 | if (!OldOpndValue) |
| 392 | dbgs() << "undef"; |
| 393 | else |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 394 | dbgs() << *OldOpndValue; |
| 395 | dbgs() << ", bound_ctrl=" << CombBCZ << '\n'); |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 396 | |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 397 | SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs; |
| 398 | auto CombOldVGPR = getRegSubRegPair(*OldOpnd); |
| 399 | // try to reuse previous old reg if its undefined (IMPLICIT_DEF) |
| 400 | if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef |
| 401 | CombOldVGPR = RegSubRegPair( |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 402 | MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass)); |
| 403 | auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(), |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 404 | TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg); |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 405 | DPPMIs.push_back(UndefInst.getInstr()); |
| 406 | } |
| 407 | |
| 408 | OrigMIs.push_back(&MovMI); |
| 409 | bool Rollback = true; |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 410 | for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) { |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 411 | Rollback = true; |
| 412 | |
| 413 | auto &OrigMI = *Use.getParent(); |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 414 | LLVM_DEBUG(dbgs() << " try: " << OrigMI); |
| 415 | |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 416 | auto OrigOp = OrigMI.getOpcode(); |
| 417 | if (TII->isVOP3(OrigOp)) { |
| 418 | if (!TII->hasVALU32BitEncoding(OrigOp)) { |
| 419 | LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n"); |
| 420 | break; |
| 421 | } |
| 422 | // check if other than abs|neg modifiers are set (opsel for example) |
| 423 | const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG); |
| 424 | if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) || |
| 425 | !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) || |
| 426 | !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) || |
| 427 | !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) { |
| 428 | LLVM_DEBUG(dbgs() << " failed: VOP3 has non-default modifiers\n"); |
| 429 | break; |
| 430 | } |
| 431 | } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) { |
| 432 | LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3\n"); |
| 433 | break; |
| 434 | } |
| 435 | |
| 436 | LLVM_DEBUG(dbgs() << " combining: " << OrigMI); |
| 437 | if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) { |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 438 | if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR, |
| 439 | OldOpndValue, CombBCZ)) { |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 440 | DPPMIs.push_back(DPPInst); |
| 441 | Rollback = false; |
| 442 | } |
| 443 | } else if (OrigMI.isCommutable() && |
| 444 | &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) { |
| 445 | auto *BB = OrigMI.getParent(); |
| 446 | auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI); |
| 447 | BB->insert(OrigMI, NewMI); |
| 448 | if (TII->commuteInstruction(*NewMI)) { |
| 449 | LLVM_DEBUG(dbgs() << " commuted: " << *NewMI); |
Valery Pykhtin | 7fe97f8 | 2019-02-08 11:59:48 +0000 | [diff] [blame] | 450 | if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR, |
| 451 | OldOpndValue, CombBCZ)) { |
Valery Pykhtin | 3d9afa2 | 2018-11-30 14:21:56 +0000 | [diff] [blame] | 452 | DPPMIs.push_back(DPPInst); |
| 453 | Rollback = false; |
| 454 | } |
| 455 | } else |
| 456 | LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n"); |
| 457 | NewMI->eraseFromParent(); |
| 458 | } else |
| 459 | LLVM_DEBUG(dbgs() << " failed: no suitable operands\n"); |
| 460 | if (Rollback) |
| 461 | break; |
| 462 | OrigMIs.push_back(&OrigMI); |
| 463 | } |
| 464 | |
| 465 | for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs)) |
| 466 | MI->eraseFromParent(); |
| 467 | |
| 468 | return !Rollback; |
| 469 | } |
| 470 | |
| 471 | bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) { |
| 472 | auto &ST = MF.getSubtarget<GCNSubtarget>(); |
| 473 | if (!ST.hasDPP() || skipFunction(MF.getFunction())) |
| 474 | return false; |
| 475 | |
| 476 | MRI = &MF.getRegInfo(); |
| 477 | TII = ST.getInstrInfo(); |
| 478 | |
| 479 | assert(MRI->isSSA() && "Must be run on SSA"); |
| 480 | |
| 481 | bool Changed = false; |
| 482 | for (auto &MBB : MF) { |
| 483 | for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) { |
| 484 | auto &MI = *I++; |
| 485 | if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) { |
| 486 | Changed = true; |
| 487 | ++NumDPPMovsCombined; |
| 488 | } |
| 489 | } |
| 490 | } |
| 491 | return Changed; |
| 492 | } |