Ron Lieberman | cac749a | 2018-11-16 01:13:34 +0000 | [diff] [blame] | 1 | //===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | /// \file |
| 9 | /// SIFixupVectorISel pass cleans up post ISEL Vector issues. |
| 10 | /// Currently this will convert GLOBAL_{LOAD|STORE}_* |
| 11 | /// and GLOBAL_Atomic_* instructions into their _SADDR variants, |
| 12 | /// feeding the sreg into the saddr field of the new instruction. |
| 13 | /// We currently handle a REG_SEQUENCE feeding the vaddr |
| 14 | /// and decompose it into a base and index. |
| 15 | /// |
| 16 | /// Transform: |
| 17 | /// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32 |
| 18 | /// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32, |
| 19 | /// %24:vgpr_32, %19:sreg_64_xexec |
| 20 | /// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1 |
| 21 | /// %11:vreg_64 = COPY %16:vreg_64 |
| 22 | /// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0 |
| 23 | /// Into: |
| 24 | /// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0 |
| 25 | /// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1 |
| 26 | /// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16... |
| 27 | /// |
| 28 | //===----------------------------------------------------------------------===// |
| 29 | // |
| 30 | |
| 31 | #include "AMDGPU.h" |
| 32 | #include "AMDGPUSubtarget.h" |
| 33 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| 34 | #include "llvm/ADT/Statistic.h" |
| 35 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 36 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 37 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 38 | #include "llvm/IR/Function.h" |
| 39 | #include "llvm/IR/LLVMContext.h" |
| 40 | #include "llvm/Support/Debug.h" |
| 41 | #include "llvm/Target/TargetMachine.h" |
| 42 | #define DEBUG_TYPE "si-fixup-vector-isel" |
| 43 | |
| 44 | using namespace llvm; |
| 45 | |
| 46 | STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities"); |
| 47 | STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted"); |
| 48 | |
| 49 | namespace { |
| 50 | |
| 51 | class SIFixupVectorISel : public MachineFunctionPass { |
| 52 | public: |
| 53 | static char ID; |
| 54 | |
| 55 | public: |
| 56 | SIFixupVectorISel() : MachineFunctionPass(ID) { |
| 57 | initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry()); |
| 58 | } |
| 59 | |
| 60 | bool runOnMachineFunction(MachineFunction &MF) override; |
| 61 | |
| 62 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 63 | AU.setPreservesCFG(); |
| 64 | MachineFunctionPass::getAnalysisUsage(AU); |
| 65 | } |
| 66 | }; |
| 67 | |
| 68 | } // End anonymous namespace. |
| 69 | |
| 70 | INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE, |
| 71 | "SI Fixup Vector ISel", false, false) |
| 72 | |
| 73 | char SIFixupVectorISel::ID = 0; |
| 74 | |
| 75 | char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID; |
| 76 | |
| 77 | FunctionPass *llvm::createSIFixupVectorISelPass() { |
| 78 | return new SIFixupVectorISel(); |
| 79 | } |
| 80 | |
| 81 | static bool findSRegBaseAndIndex(MachineOperand *Op, |
| 82 | unsigned &BaseReg, |
| 83 | unsigned &IndexReg, |
| 84 | MachineRegisterInfo &MRI, |
| 85 | const SIRegisterInfo *TRI) { |
| 86 | SmallVector<MachineOperand *, 8> Worklist; |
| 87 | Worklist.push_back(Op); |
| 88 | while (!Worklist.empty()) { |
| 89 | MachineOperand *WOp = Worklist.pop_back_val(); |
| 90 | if (!WOp->isReg() || |
| 91 | !TargetRegisterInfo::isVirtualRegister(WOp->getReg())) |
| 92 | continue; |
| 93 | MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg()); |
| 94 | switch (DefInst->getOpcode()) { |
| 95 | default: |
| 96 | continue; |
| 97 | case AMDGPU::COPY: |
| 98 | Worklist.push_back(&DefInst->getOperand(1)); |
| 99 | break; |
| 100 | case AMDGPU::REG_SEQUENCE: |
| 101 | if (DefInst->getNumOperands() != 5) |
| 102 | continue; |
| 103 | Worklist.push_back(&DefInst->getOperand(1)); |
| 104 | Worklist.push_back(&DefInst->getOperand(3)); |
| 105 | break; |
| 106 | case AMDGPU::V_ADD_I32_e64: |
| 107 | // The V_ADD_* and its analogous V_ADDCV_* are generated by |
| 108 | // a previous pass which lowered from an ADD_64_PSEUDO, |
| 109 | // which generates subregs to break up the 64 bit args. |
| 110 | if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister) |
| 111 | continue; |
| 112 | BaseReg = DefInst->getOperand(2).getReg(); |
| 113 | if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister) |
| 114 | continue; |
| 115 | IndexReg = DefInst->getOperand(3).getReg(); |
| 116 | // Chase the IndexReg. |
| 117 | MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg); |
| 118 | if (!MI || !MI->isCopy()) |
| 119 | continue; |
| 120 | // Make sure the reg class is 64 bit for Index. |
| 121 | // If the Index register is a subreg, we want it to reference |
| 122 | // a 64 bit register which we will use as the Index reg. |
| 123 | const TargetRegisterClass *IdxRC, *BaseRC; |
| 124 | IdxRC = MRI.getRegClass(MI->getOperand(1).getReg()); |
| 125 | if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64) |
| 126 | continue; |
| 127 | IndexReg = MI->getOperand(1).getReg(); |
| 128 | // Chase the BaseReg. |
| 129 | MI = MRI.getUniqueVRegDef(BaseReg); |
| 130 | if (!MI || !MI->isCopy()) |
| 131 | continue; |
| 132 | // Make sure the register class is 64 bit for Base. |
| 133 | BaseReg = MI->getOperand(1).getReg(); |
| 134 | BaseRC = MRI.getRegClass(BaseReg); |
| 135 | if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64) |
| 136 | continue; |
| 137 | // Make sure Base is SReg and Index is VReg. |
| 138 | if (!TRI->isSGPRReg(MRI, BaseReg)) |
| 139 | return false; |
| 140 | if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg))) |
| 141 | return false; |
| 142 | // clear any killed flags on Index and Base regs, used later. |
| 143 | MRI.clearKillFlags(IndexReg); |
| 144 | MRI.clearKillFlags(BaseReg); |
| 145 | return true; |
| 146 | } |
| 147 | } |
| 148 | return false; |
| 149 | } |
| 150 | |
| 151 | // Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR. |
| 152 | static bool fixupGlobalSaddr(MachineBasicBlock &MBB, |
| 153 | MachineFunction &MF, |
| 154 | MachineRegisterInfo &MRI, |
| 155 | const GCNSubtarget &ST, |
| 156 | const SIInstrInfo *TII, |
| 157 | const SIRegisterInfo *TRI) { |
| 158 | bool FuncModified = false; |
| 159 | MachineBasicBlock::iterator I, Next; |
| 160 | for (I = MBB.begin(); I != MBB.end(); I = Next) { |
| 161 | Next = std::next(I); |
| 162 | MachineInstr &MI = *I; |
| 163 | int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode()); |
| 164 | if (NewOpcd < 0) |
| 165 | continue; |
| 166 | // Update our statistics on opportunities seen. |
| 167 | ++NumSGPRGlobalOccurs; |
| 168 | LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n'); |
| 169 | // Need a Base and Index or we cant transform to _SADDR. |
| 170 | unsigned BaseReg = 0; |
| 171 | unsigned IndexReg = 0; |
| 172 | MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); |
| 173 | if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI)) |
| 174 | continue; |
| 175 | ++NumSGPRGlobalSaddrs; |
| 176 | FuncModified = true; |
| 177 | // Create the new _SADDR Memory instruction. |
| 178 | bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr; |
| 179 | MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata); |
| 180 | MachineInstr *NewGlob = nullptr; |
| 181 | NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd)); |
| 182 | if (HasVdst) |
| 183 | NewGlob->addOperand(MF, MI.getOperand(0)); |
| 184 | NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false)); |
| 185 | if (VData) |
| 186 | NewGlob->addOperand(MF, *VData); |
| 187 | NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false)); |
| 188 | NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset)); |
| 189 | |
| 190 | MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc); |
| 191 | // Atomics dont have a GLC, so omit the field if not there. |
| 192 | if (Glc) |
| 193 | NewGlob->addOperand(MF, *Glc); |
| 194 | NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc)); |
| 195 | // _D16 have an vdst_in operand, copy it in. |
| 196 | MachineOperand *VDstInOp = TII->getNamedOperand(MI, |
| 197 | AMDGPU::OpName::vdst_in); |
| 198 | if (VDstInOp) |
| 199 | NewGlob->addOperand(MF, *VDstInOp); |
| 200 | NewGlob->copyImplicitOps(MF, MI); |
| 201 | NewGlob->cloneMemRefs(MF, MI); |
| 202 | // Remove the old Global Memop instruction. |
| 203 | MI.eraseFromParent(); |
| 204 | LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n'); |
| 205 | } |
| 206 | return FuncModified; |
| 207 | } |
| 208 | |
| 209 | bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) { |
| 210 | if (skipFunction(MF.getFunction())) |
| 211 | return false; |
| 212 | |
| 213 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 214 | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
| 215 | const SIInstrInfo *TII = ST.getInstrInfo(); |
| 216 | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
| 217 | |
| 218 | bool FuncModified = false; |
| 219 | for (MachineBasicBlock &MBB : MF) { |
| 220 | // Cleanup missed Saddr opportunites from ISel. |
| 221 | FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI); |
| 222 | } |
| 223 | return FuncModified; |
| 224 | } |