|  | //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | /// \file | 
|  | /// Pass to pre-allocated WWM registers | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AMDGPU.h" | 
|  | #include "AMDGPUSubtarget.h" | 
|  | #include "SIInstrInfo.h" | 
|  | #include "SIRegisterInfo.h" | 
|  | #include "SIMachineFunctionInfo.h" | 
|  | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | 
|  | #include "llvm/ADT/PostOrderIterator.h" | 
|  | #include "llvm/CodeGen/VirtRegMap.h" | 
|  | #include "llvm/CodeGen/LiveInterval.h" | 
|  | #include "llvm/CodeGen/LiveIntervals.h" | 
|  | #include "llvm/CodeGen/LiveRegMatrix.h" | 
|  | #include "llvm/CodeGen/MachineDominators.h" | 
|  | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | #include "llvm/CodeGen/RegisterClassInfo.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "si-pre-allocate-wwm-regs" | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class SIPreAllocateWWMRegs : public MachineFunctionPass { | 
|  | private: | 
|  | const SIInstrInfo *TII; | 
|  | const SIRegisterInfo *TRI; | 
|  | MachineRegisterInfo *MRI; | 
|  | LiveIntervals *LIS; | 
|  | LiveRegMatrix *Matrix; | 
|  | VirtRegMap *VRM; | 
|  | RegisterClassInfo RegClassInfo; | 
|  |  | 
|  | std::vector<unsigned> RegsToRewrite; | 
|  |  | 
|  | public: | 
|  | static char ID; | 
|  |  | 
|  | SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { | 
|  | initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); | 
|  | } | 
|  |  | 
|  | bool runOnMachineFunction(MachineFunction &MF) override; | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.addRequired<LiveIntervals>(); | 
|  | AU.addPreserved<LiveIntervals>(); | 
|  | AU.addRequired<VirtRegMap>(); | 
|  | AU.addRequired<LiveRegMatrix>(); | 
|  | AU.addPreserved<SlotIndexes>(); | 
|  | AU.setPreservesCFG(); | 
|  | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | } | 
|  |  | 
|  | private: | 
|  | bool processDef(MachineOperand &MO); | 
|  | void rewriteRegs(MachineFunction &MF); | 
|  | }; | 
|  |  | 
|  | } // End anonymous namespace. | 
|  |  | 
|  | INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, | 
|  | "SI Pre-allocate WWM Registers", false, false) | 
|  | INITIALIZE_PASS_DEPENDENCY(LiveIntervals) | 
|  | INITIALIZE_PASS_DEPENDENCY(VirtRegMap) | 
|  | INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) | 
|  | INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, | 
|  | "SI Pre-allocate WWM Registers", false, false) | 
|  |  | 
|  | char SIPreAllocateWWMRegs::ID = 0; | 
|  |  | 
|  | char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; | 
|  |  | 
|  | FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { | 
|  | return new SIPreAllocateWWMRegs(); | 
|  | } | 
|  |  | 
|  | bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { | 
|  | if (!MO.isReg()) | 
|  | return false; | 
|  |  | 
|  | Register Reg = MO.getReg(); | 
|  |  | 
|  | if (!TRI->isVGPR(*MRI, Reg)) | 
|  | return false; | 
|  |  | 
|  | if (Register::isPhysicalRegister(Reg)) | 
|  | return false; | 
|  |  | 
|  | if (VRM->hasPhys(Reg)) | 
|  | return false; | 
|  |  | 
|  | LiveInterval &LI = LIS->getInterval(Reg); | 
|  |  | 
|  | for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { | 
|  | if (!MRI->isPhysRegUsed(PhysReg) && | 
|  | Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) { | 
|  | Matrix->assign(LI, PhysReg); | 
|  | assert(PhysReg != 0); | 
|  | RegsToRewrite.push_back(Reg); | 
|  | return true; | 
|  | } | 
|  | } | 
|  |  | 
|  | llvm_unreachable("physreg not found for WWM expression"); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { | 
|  | for (MachineBasicBlock &MBB : MF) { | 
|  | for (MachineInstr &MI : MBB) { | 
|  | for (MachineOperand &MO : MI.operands()) { | 
|  | if (!MO.isReg()) | 
|  | continue; | 
|  |  | 
|  | const Register VirtReg = MO.getReg(); | 
|  | if (Register::isPhysicalRegister(VirtReg)) | 
|  | continue; | 
|  |  | 
|  | if (!VRM->hasPhys(VirtReg)) | 
|  | continue; | 
|  |  | 
|  | Register PhysReg = VRM->getPhys(VirtReg); | 
|  | const unsigned SubReg = MO.getSubReg(); | 
|  | if (SubReg != 0) { | 
|  | PhysReg = TRI->getSubReg(PhysReg, SubReg); | 
|  | MO.setSubReg(0); | 
|  | } | 
|  |  | 
|  | MO.setReg(PhysReg); | 
|  | MO.setIsRenamable(false); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | 
|  |  | 
|  | for (unsigned Reg : RegsToRewrite) { | 
|  | LIS->removeInterval(Reg); | 
|  |  | 
|  | const Register PhysReg = VRM->getPhys(Reg); | 
|  | assert(PhysReg != 0); | 
|  | MFI->ReserveWWMRegister(PhysReg); | 
|  | } | 
|  |  | 
|  | RegsToRewrite.clear(); | 
|  |  | 
|  | // Update the set of reserved registers to include WWM ones. | 
|  | MRI->freezeReservedRegs(MF); | 
|  | } | 
|  |  | 
|  | bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { | 
|  | LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); | 
|  |  | 
|  | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | 
|  |  | 
|  | TII = ST.getInstrInfo(); | 
|  | TRI = &TII->getRegisterInfo(); | 
|  | MRI = &MF.getRegInfo(); | 
|  |  | 
|  | LIS = &getAnalysis<LiveIntervals>(); | 
|  | Matrix = &getAnalysis<LiveRegMatrix>(); | 
|  | VRM = &getAnalysis<VirtRegMap>(); | 
|  |  | 
|  | RegClassInfo.runOnMachineFunction(MF); | 
|  |  | 
|  | bool RegsAssigned = false; | 
|  |  | 
|  | // We use a reverse post-order traversal of the control-flow graph to | 
|  | // guarantee that we visit definitions in dominance order. Since WWM | 
|  | // expressions are guaranteed to never involve phi nodes, and we can only | 
|  | // escape WWM through the special WWM instruction, this means that this is a | 
|  | // perfect elimination order, so we can never do any better. | 
|  | ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); | 
|  |  | 
|  | for (MachineBasicBlock *MBB : RPOT) { | 
|  | bool InWWM = false; | 
|  | for (MachineInstr &MI : *MBB) { | 
|  | if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 || | 
|  | MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64) | 
|  | RegsAssigned |= processDef(MI.getOperand(0)); | 
|  |  | 
|  | if (MI.getOpcode() == AMDGPU::ENTER_WWM) { | 
|  | LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n"); | 
|  | InWWM = true; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | if (MI.getOpcode() == AMDGPU::EXIT_WWM) { | 
|  | LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n"); | 
|  | InWWM = false; | 
|  | } | 
|  |  | 
|  | if (!InWWM) | 
|  | continue; | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "processing " << MI << "\n"); | 
|  |  | 
|  | for (MachineOperand &DefOpnd : MI.defs()) { | 
|  | RegsAssigned |= processDef(DefOpnd); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (!RegsAssigned) | 
|  | return false; | 
|  |  | 
|  | rewriteRegs(MF); | 
|  | return true; | 
|  | } |