Ron Lieberman | cac749a | 2018-11-16 01:13:34 +0000 | [diff] [blame] | 1 | //===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | /// \file |
| 9 | /// SIFixupVectorISel pass cleans up post ISEL Vector issues. |
| 10 | /// Currently this will convert GLOBAL_{LOAD|STORE}_* |
| 11 | /// and GLOBAL_Atomic_* instructions into their _SADDR variants, |
| 12 | /// feeding the sreg into the saddr field of the new instruction. |
| 13 | /// We currently handle a REG_SEQUENCE feeding the vaddr |
| 14 | /// and decompose it into a base and index. |
| 15 | /// |
| 16 | /// Transform: |
| 17 | /// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32 |
| 18 | /// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32, |
| 19 | /// %24:vgpr_32, %19:sreg_64_xexec |
| 20 | /// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1 |
| 21 | /// %11:vreg_64 = COPY %16:vreg_64 |
| 22 | /// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0 |
| 23 | /// Into: |
| 24 | /// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0 |
| 25 | /// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1 |
| 26 | /// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16... |
| 27 | /// |
| 28 | //===----------------------------------------------------------------------===// |
| 29 | // |
| 30 | |
| 31 | #include "AMDGPU.h" |
| 32 | #include "AMDGPUSubtarget.h" |
| 33 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 34 | #include "llvm/ADT/Statistic.h" |
| 35 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 36 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 37 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 38 | #include "llvm/IR/Function.h" |
| 39 | #include "llvm/IR/LLVMContext.h" |
| 40 | #include "llvm/Support/Debug.h" |
| 41 | #include "llvm/Target/TargetMachine.h" |
| 42 | #define DEBUG_TYPE "si-fixup-vector-isel" |
| 43 | |
| 44 | using namespace llvm; |
| 45 | |
Ron Lieberman | f48e43b | 2018-11-30 18:29:17 +0000 | [diff] [blame^] | 46 | static cl::opt<bool> EnableGlobalSGPRAddr( |
| 47 | "amdgpu-enable-global-sgpr-addr", |
| 48 | cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"), |
| 49 | cl::init(false)); |
| 50 | |
Ron Lieberman | cac749a | 2018-11-16 01:13:34 +0000 | [diff] [blame] | 51 | STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities"); |
| 52 | STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted"); |
| 53 | |
| 54 | namespace { |
| 55 | |
| 56 | class SIFixupVectorISel : public MachineFunctionPass { |
| 57 | public: |
| 58 | static char ID; |
| 59 | |
| 60 | public: |
| 61 | SIFixupVectorISel() : MachineFunctionPass(ID) { |
| 62 | initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry()); |
| 63 | } |
| 64 | |
| 65 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 66 | |
| 67 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 68 | AU.setPreservesCFG(); |
| 69 | MachineFunctionPass::getAnalysisUsage(AU); |
| 70 | } |
| 71 | }; |
| 72 | |
| 73 | } // End anonymous namespace. |
| 74 | |
| 75 | INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE, |
| 76 | "SI Fixup Vector ISel", false, false) |
| 77 | |
| 78 | char SIFixupVectorISel::ID = 0; |
| 79 | |
| 80 | char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID; |
| 81 | |
| 82 | FunctionPass *llvm::createSIFixupVectorISelPass() { |
| 83 | return new SIFixupVectorISel(); |
| 84 | } |
| 85 | |
| 86 | static bool findSRegBaseAndIndex(MachineOperand *Op, |
| 87 | unsigned &BaseReg, |
| 88 | unsigned &IndexReg, |
| 89 | MachineRegisterInfo &MRI, |
| 90 | const SIRegisterInfo *TRI) { |
| 91 | SmallVector<MachineOperand *, 8> Worklist; |
| 92 | Worklist.push_back(Op); |
| 93 | while (!Worklist.empty()) { |
| 94 | MachineOperand *WOp = Worklist.pop_back_val(); |
| 95 | if (!WOp->isReg() || |
| 96 | !TargetRegisterInfo::isVirtualRegister(WOp->getReg())) |
| 97 | continue; |
| 98 | MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg()); |
| 99 | switch (DefInst->getOpcode()) { |
| 100 | default: |
| 101 | continue; |
| 102 | case AMDGPU::COPY: |
| 103 | Worklist.push_back(&DefInst->getOperand(1)); |
| 104 | break; |
| 105 | case AMDGPU::REG_SEQUENCE: |
| 106 | if (DefInst->getNumOperands() != 5) |
| 107 | continue; |
| 108 | Worklist.push_back(&DefInst->getOperand(1)); |
| 109 | Worklist.push_back(&DefInst->getOperand(3)); |
| 110 | break; |
| 111 | case AMDGPU::V_ADD_I32_e64: |
| 112 | // The V_ADD_* and its analogous V_ADDCV_* are generated by |
| 113 | // a previous pass which lowered from an ADD_64_PSEUDO, |
| 114 | // which generates subregs to break up the 64 bit args. |
| 115 | if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister) |
| 116 | continue; |
| 117 | BaseReg = DefInst->getOperand(2).getReg(); |
| 118 | if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister) |
| 119 | continue; |
| 120 | IndexReg = DefInst->getOperand(3).getReg(); |
| 121 | // Chase the IndexReg. |
| 122 | MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg); |
| 123 | if (!MI || !MI->isCopy()) |
| 124 | continue; |
| 125 | // Make sure the reg class is 64 bit for Index. |
| 126 | // If the Index register is a subreg, we want it to reference |
| 127 | // a 64 bit register which we will use as the Index reg. |
| 128 | const TargetRegisterClass *IdxRC, *BaseRC; |
| 129 | IdxRC = MRI.getRegClass(MI->getOperand(1).getReg()); |
| 130 | if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64) |
| 131 | continue; |
| 132 | IndexReg = MI->getOperand(1).getReg(); |
| 133 | // Chase the BaseReg. |
| 134 | MI = MRI.getUniqueVRegDef(BaseReg); |
| 135 | if (!MI || !MI->isCopy()) |
| 136 | continue; |
| 137 | // Make sure the register class is 64 bit for Base. |
| 138 | BaseReg = MI->getOperand(1).getReg(); |
| 139 | BaseRC = MRI.getRegClass(BaseReg); |
| 140 | if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64) |
| 141 | continue; |
| 142 | // Make sure Base is SReg and Index is VReg. |
| 143 | if (!TRI->isSGPRReg(MRI, BaseReg)) |
| 144 | return false; |
| 145 | if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg))) |
| 146 | return false; |
| 147 | // clear any killed flags on Index and Base regs, used later. |
| 148 | MRI.clearKillFlags(IndexReg); |
| 149 | MRI.clearKillFlags(BaseReg); |
| 150 | return true; |
| 151 | } |
| 152 | } |
| 153 | return false; |
| 154 | } |
| 155 | |
| 156 | // Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR. |
| 157 | static bool fixupGlobalSaddr(MachineBasicBlock &MBB, |
| 158 | MachineFunction &MF, |
| 159 | MachineRegisterInfo &MRI, |
| 160 | const GCNSubtarget &ST, |
| 161 | const SIInstrInfo *TII, |
| 162 | const SIRegisterInfo *TRI) { |
Ron Lieberman | f48e43b | 2018-11-30 18:29:17 +0000 | [diff] [blame^] | 163 | if (!EnableGlobalSGPRAddr) |
| 164 | return false; |
Ron Lieberman | cac749a | 2018-11-16 01:13:34 +0000 | [diff] [blame] | 165 | bool FuncModified = false; |
| 166 | MachineBasicBlock::iterator I, Next; |
| 167 | for (I = MBB.begin(); I != MBB.end(); I = Next) { |
| 168 | Next = std::next(I); |
| 169 | MachineInstr &MI = *I; |
| 170 | int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode()); |
| 171 | if (NewOpcd < 0) |
| 172 | continue; |
| 173 | // Update our statistics on opportunities seen. |
| 174 | ++NumSGPRGlobalOccurs; |
| 175 | LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n'); |
| 176 | // Need a Base and Index or we cant transform to _SADDR. |
| 177 | unsigned BaseReg = 0; |
| 178 | unsigned IndexReg = 0; |
| 179 | MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); |
| 180 | if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI)) |
| 181 | continue; |
| 182 | ++NumSGPRGlobalSaddrs; |
| 183 | FuncModified = true; |
| 184 | // Create the new _SADDR Memory instruction. |
| 185 | bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr; |
| 186 | MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata); |
| 187 | MachineInstr *NewGlob = nullptr; |
| 188 | NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd)); |
| 189 | if (HasVdst) |
| 190 | NewGlob->addOperand(MF, MI.getOperand(0)); |
| 191 | NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false)); |
| 192 | if (VData) |
| 193 | NewGlob->addOperand(MF, *VData); |
| 194 | NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false)); |
| 195 | NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset)); |
| 196 | |
| 197 | MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc); |
| 198 | // Atomics dont have a GLC, so omit the field if not there. |
| 199 | if (Glc) |
| 200 | NewGlob->addOperand(MF, *Glc); |
| 201 | NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc)); |
| 202 | // _D16 have an vdst_in operand, copy it in. |
| 203 | MachineOperand *VDstInOp = TII->getNamedOperand(MI, |
| 204 | AMDGPU::OpName::vdst_in); |
| 205 | if (VDstInOp) |
| 206 | NewGlob->addOperand(MF, *VDstInOp); |
| 207 | NewGlob->copyImplicitOps(MF, MI); |
| 208 | NewGlob->cloneMemRefs(MF, MI); |
| 209 | // Remove the old Global Memop instruction. |
| 210 | MI.eraseFromParent(); |
| 211 | LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n'); |
| 212 | } |
| 213 | return FuncModified; |
| 214 | } |
| 215 | |
| 216 | bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) { |
| 217 | if (skipFunction(MF.getFunction())) |
| 218 | return false; |
| 219 | |
| 220 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 221 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 222 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 223 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
| 224 | |
| 225 | bool FuncModified = false; |
| 226 | for (MachineBasicBlock &MBB : MF) { |
| 227 | // Cleanup missed Saddr opportunites from ISel. |
| 228 | FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI); |
| 229 | } |
| 230 | return FuncModified; |
| 231 | } |