Ron Lieberman | cac749a | 2018-11-16 01:13:34 +0000 | [diff] [blame] | 1 | //===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===// |
| 2 | // |
Chandler Carruth | 2946cd7 | 2019-01-19 08:50:56 +0000 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Ron Lieberman | cac749a | 2018-11-16 01:13:34 +0000 | [diff] [blame] | 6 | // |
| 7 | /// \file |
| 8 | /// SIFixupVectorISel pass cleans up post ISEL Vector issues. |
| 9 | /// Currently this will convert GLOBAL_{LOAD|STORE}_* |
| 10 | /// and GLOBAL_Atomic_* instructions into their _SADDR variants, |
| 11 | /// feeding the sreg into the saddr field of the new instruction. |
| 12 | /// We currently handle a REG_SEQUENCE feeding the vaddr |
| 13 | /// and decompose it into a base and index. |
| 14 | /// |
| 15 | /// Transform: |
| 16 | /// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32 |
| 17 | /// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32, |
| 18 | /// %24:vgpr_32, %19:sreg_64_xexec |
| 19 | /// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1 |
| 20 | /// %11:vreg_64 = COPY %16:vreg_64 |
| 21 | /// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0 |
| 22 | /// Into: |
| 23 | /// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0 |
| 24 | /// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1 |
| 25 | /// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16... |
| 26 | /// |
| 27 | //===----------------------------------------------------------------------===// |
| 28 | // |
| 29 | |
| 30 | #include "AMDGPU.h" |
| 31 | #include "AMDGPUSubtarget.h" |
| 32 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 33 | #include "llvm/ADT/Statistic.h" |
| 34 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 36 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 37 | #include "llvm/IR/Function.h" |
| 38 | #include "llvm/IR/LLVMContext.h" |
| 39 | #include "llvm/Support/Debug.h" |
| 40 | #include "llvm/Target/TargetMachine.h" |
| 41 | #define DEBUG_TYPE "si-fixup-vector-isel" |
| 42 | |
| 43 | using namespace llvm; |
| 44 | |
Ron Lieberman | f48e43b | 2018-11-30 18:29:17 +0000 | [diff] [blame] | 45 | static cl::opt<bool> EnableGlobalSGPRAddr( |
| 46 | "amdgpu-enable-global-sgpr-addr", |
| 47 | cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"), |
| 48 | cl::init(false)); |
| 49 | |
Ron Lieberman | cac749a | 2018-11-16 01:13:34 +0000 | [diff] [blame] | 50 | STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities"); |
| 51 | STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted"); |
| 52 | |
| 53 | namespace { |
| 54 | |
| 55 | class SIFixupVectorISel : public MachineFunctionPass { |
| 56 | public: |
| 57 | static char ID; |
| 58 | |
| 59 | public: |
| 60 | SIFixupVectorISel() : MachineFunctionPass(ID) { |
| 61 | initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry()); |
| 62 | } |
| 63 | |
| 64 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 65 | |
| 66 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 67 | AU.setPreservesCFG(); |
| 68 | MachineFunctionPass::getAnalysisUsage(AU); |
| 69 | } |
| 70 | }; |
| 71 | |
| 72 | } // End anonymous namespace. |
| 73 | |
| 74 | INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE, |
| 75 | "SI Fixup Vector ISel", false, false) |
| 76 | |
| 77 | char SIFixupVectorISel::ID = 0; |
| 78 | |
| 79 | char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID; |
| 80 | |
| 81 | FunctionPass *llvm::createSIFixupVectorISelPass() { |
| 82 | return new SIFixupVectorISel(); |
| 83 | } |
| 84 | |
| 85 | static bool findSRegBaseAndIndex(MachineOperand *Op, |
| 86 | unsigned &BaseReg, |
| 87 | unsigned &IndexReg, |
| 88 | MachineRegisterInfo &MRI, |
| 89 | const SIRegisterInfo *TRI) { |
| 90 | SmallVector<MachineOperand *, 8> Worklist; |
| 91 | Worklist.push_back(Op); |
| 92 | while (!Worklist.empty()) { |
| 93 | MachineOperand *WOp = Worklist.pop_back_val(); |
| 94 | if (!WOp->isReg() || |
| 95 | !TargetRegisterInfo::isVirtualRegister(WOp->getReg())) |
| 96 | continue; |
| 97 | MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg()); |
| 98 | switch (DefInst->getOpcode()) { |
| 99 | default: |
| 100 | continue; |
| 101 | case AMDGPU::COPY: |
| 102 | Worklist.push_back(&DefInst->getOperand(1)); |
| 103 | break; |
| 104 | case AMDGPU::REG_SEQUENCE: |
| 105 | if (DefInst->getNumOperands() != 5) |
| 106 | continue; |
| 107 | Worklist.push_back(&DefInst->getOperand(1)); |
| 108 | Worklist.push_back(&DefInst->getOperand(3)); |
| 109 | break; |
| 110 | case AMDGPU::V_ADD_I32_e64: |
| 111 | // The V_ADD_* and its analogous V_ADDCV_* are generated by |
| 112 | // a previous pass which lowered from an ADD_64_PSEUDO, |
| 113 | // which generates subregs to break up the 64 bit args. |
| 114 | if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister) |
| 115 | continue; |
| 116 | BaseReg = DefInst->getOperand(2).getReg(); |
| 117 | if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister) |
| 118 | continue; |
| 119 | IndexReg = DefInst->getOperand(3).getReg(); |
| 120 | // Chase the IndexReg. |
| 121 | MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg); |
| 122 | if (!MI || !MI->isCopy()) |
| 123 | continue; |
| 124 | // Make sure the reg class is 64 bit for Index. |
| 125 | // If the Index register is a subreg, we want it to reference |
| 126 | // a 64 bit register which we will use as the Index reg. |
| 127 | const TargetRegisterClass *IdxRC, *BaseRC; |
| 128 | IdxRC = MRI.getRegClass(MI->getOperand(1).getReg()); |
| 129 | if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64) |
| 130 | continue; |
| 131 | IndexReg = MI->getOperand(1).getReg(); |
| 132 | // Chase the BaseReg. |
| 133 | MI = MRI.getUniqueVRegDef(BaseReg); |
| 134 | if (!MI || !MI->isCopy()) |
| 135 | continue; |
| 136 | // Make sure the register class is 64 bit for Base. |
| 137 | BaseReg = MI->getOperand(1).getReg(); |
| 138 | BaseRC = MRI.getRegClass(BaseReg); |
| 139 | if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64) |
| 140 | continue; |
| 141 | // Make sure Base is SReg and Index is VReg. |
| 142 | if (!TRI->isSGPRReg(MRI, BaseReg)) |
| 143 | return false; |
| 144 | if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg))) |
| 145 | return false; |
| 146 | // clear any killed flags on Index and Base regs, used later. |
| 147 | MRI.clearKillFlags(IndexReg); |
| 148 | MRI.clearKillFlags(BaseReg); |
| 149 | return true; |
| 150 | } |
| 151 | } |
| 152 | return false; |
| 153 | } |
| 154 | |
| 155 | // Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR. |
| 156 | static bool fixupGlobalSaddr(MachineBasicBlock &MBB, |
| 157 | MachineFunction &MF, |
| 158 | MachineRegisterInfo &MRI, |
| 159 | const GCNSubtarget &ST, |
| 160 | const SIInstrInfo *TII, |
| 161 | const SIRegisterInfo *TRI) { |
Ron Lieberman | f48e43b | 2018-11-30 18:29:17 +0000 | [diff] [blame] | 162 | if (!EnableGlobalSGPRAddr) |
| 163 | return false; |
Ron Lieberman | cac749a | 2018-11-16 01:13:34 +0000 | [diff] [blame] | 164 | bool FuncModified = false; |
| 165 | MachineBasicBlock::iterator I, Next; |
| 166 | for (I = MBB.begin(); I != MBB.end(); I = Next) { |
| 167 | Next = std::next(I); |
| 168 | MachineInstr &MI = *I; |
| 169 | int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode()); |
| 170 | if (NewOpcd < 0) |
| 171 | continue; |
| 172 | // Update our statistics on opportunities seen. |
| 173 | ++NumSGPRGlobalOccurs; |
| 174 | LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n'); |
| 175 | // Need a Base and Index or we cant transform to _SADDR. |
| 176 | unsigned BaseReg = 0; |
| 177 | unsigned IndexReg = 0; |
| 178 | MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); |
| 179 | if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI)) |
| 180 | continue; |
| 181 | ++NumSGPRGlobalSaddrs; |
| 182 | FuncModified = true; |
| 183 | // Create the new _SADDR Memory instruction. |
| 184 | bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr; |
| 185 | MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata); |
| 186 | MachineInstr *NewGlob = nullptr; |
| 187 | NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd)); |
| 188 | if (HasVdst) |
| 189 | NewGlob->addOperand(MF, MI.getOperand(0)); |
| 190 | NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false)); |
| 191 | if (VData) |
| 192 | NewGlob->addOperand(MF, *VData); |
| 193 | NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false)); |
| 194 | NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset)); |
| 195 | |
| 196 | MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc); |
| 197 | // Atomics dont have a GLC, so omit the field if not there. |
| 198 | if (Glc) |
| 199 | NewGlob->addOperand(MF, *Glc); |
| 200 | NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc)); |
| 201 | // _D16 have an vdst_in operand, copy it in. |
| 202 | MachineOperand *VDstInOp = TII->getNamedOperand(MI, |
| 203 | AMDGPU::OpName::vdst_in); |
| 204 | if (VDstInOp) |
| 205 | NewGlob->addOperand(MF, *VDstInOp); |
| 206 | NewGlob->copyImplicitOps(MF, MI); |
| 207 | NewGlob->cloneMemRefs(MF, MI); |
| 208 | // Remove the old Global Memop instruction. |
| 209 | MI.eraseFromParent(); |
| 210 | LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n'); |
| 211 | } |
| 212 | return FuncModified; |
| 213 | } |
| 214 | |
| 215 | bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) { |
| 216 | if (skipFunction(MF.getFunction())) |
| 217 | return false; |
| 218 | |
| 219 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 220 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 221 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 222 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
| 223 | |
| 224 | bool FuncModified = false; |
| 225 | for (MachineBasicBlock &MBB : MF) { |
| 226 | // Cleanup missed Saddr opportunites from ISel. |
| 227 | FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI); |
| 228 | } |
| 229 | return FuncModified; |
| 230 | } |