|  | //===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | /// \file | 
|  | /// \brief Try to reassign registers on GFX10+ from non-sequential to sequential | 
|  | /// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA | 
|  | /// with sequential versions where possible. | 
|  | /// | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AMDGPU.h" | 
|  | #include "AMDGPUSubtarget.h" | 
|  | #include "SIInstrInfo.h" | 
|  | #include "SIMachineFunctionInfo.h" | 
|  | #include "llvm/ADT/Statistic.h" | 
|  | #include "llvm/CodeGen/LiveInterval.h" | 
|  | #include "llvm/CodeGen/LiveIntervals.h" | 
|  | #include "llvm/CodeGen/LiveRegMatrix.h" | 
|  | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | #include "llvm/CodeGen/VirtRegMap.h" | 
|  | #include "llvm/Support/MathExtras.h" | 
|  | #include <algorithm> | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "amdgpu-nsa-reassign" | 
|  |  | 
|  | STATISTIC(NumNSAInstructions, | 
|  | "Number of NSA instructions with non-sequential address found"); | 
|  | STATISTIC(NumNSAConverted, | 
|  | "Number of NSA instructions changed to sequential"); | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class GCNNSAReassign : public MachineFunctionPass { | 
|  | public: | 
|  | static char ID; | 
|  |  | 
|  | GCNNSAReassign() : MachineFunctionPass(ID) { | 
|  | initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry()); | 
|  | } | 
|  |  | 
|  | bool runOnMachineFunction(MachineFunction &MF) override; | 
|  |  | 
|  | StringRef getPassName() const override { return "GCN NSA Reassign"; } | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.addRequired<LiveIntervals>(); | 
|  | AU.addRequired<VirtRegMap>(); | 
|  | AU.addRequired<LiveRegMatrix>(); | 
|  | AU.setPreservesAll(); | 
|  | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | } | 
|  |  | 
|  | private: | 
|  | typedef enum { | 
|  | NOT_NSA,        // Not an NSA instruction | 
|  | FIXED,          // NSA which we cannot modify | 
|  | NON_CONTIGUOUS, // NSA with non-sequential address which we can try | 
|  | // to optimize. | 
|  | CONTIGUOUS      // NSA with all sequential address registers | 
|  | } NSA_Status; | 
|  |  | 
|  | const GCNSubtarget *ST; | 
|  |  | 
|  | const MachineRegisterInfo *MRI; | 
|  |  | 
|  | const SIRegisterInfo *TRI; | 
|  |  | 
|  | VirtRegMap *VRM; | 
|  |  | 
|  | LiveRegMatrix *LRM; | 
|  |  | 
|  | LiveIntervals *LIS; | 
|  |  | 
|  | unsigned MaxNumVGPRs; | 
|  |  | 
|  | const MCPhysReg *CSRegs; | 
|  |  | 
|  | NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const; | 
|  |  | 
|  | bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals, | 
|  | unsigned StartReg) const; | 
|  |  | 
|  | bool canAssign(unsigned StartReg, unsigned NumRegs) const; | 
|  |  | 
|  | bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const; | 
|  | }; | 
|  |  | 
|  | } // End anonymous namespace. | 
|  |  | 
|  | INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign", | 
|  | false, false) | 
|  | INITIALIZE_PASS_DEPENDENCY(LiveIntervals) | 
|  | INITIALIZE_PASS_DEPENDENCY(VirtRegMap) | 
|  | INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) | 
|  | INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign", | 
|  | false, false) | 
|  |  | 
|  |  | 
|  | char GCNNSAReassign::ID = 0; | 
|  |  | 
|  | char &llvm::GCNNSAReassignID = GCNNSAReassign::ID; | 
|  |  | 
|  | bool | 
|  | GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals, | 
|  | unsigned StartReg) const { | 
|  | unsigned NumRegs = Intervals.size(); | 
|  |  | 
|  | for (unsigned N = 0; N < NumRegs; ++N) | 
|  | if (VRM->hasPhys(Intervals[N]->reg)) | 
|  | LRM->unassign(*Intervals[N]); | 
|  |  | 
|  | for (unsigned N = 0; N < NumRegs; ++N) | 
|  | if (LRM->checkInterference(*Intervals[N], StartReg + N)) | 
|  | return false; | 
|  |  | 
|  | for (unsigned N = 0; N < NumRegs; ++N) | 
|  | LRM->assign(*Intervals[N], StartReg + N); | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const { | 
|  | for (unsigned N = 0; N < NumRegs; ++N) { | 
|  | unsigned Reg = StartReg + N; | 
|  | if (!MRI->isAllocatable(Reg)) | 
|  | return false; | 
|  |  | 
|  | for (unsigned I = 0; CSRegs[I]; ++I) | 
|  | if (TRI->isSubRegisterEq(Reg, CSRegs[I]) && | 
|  | !LRM->isPhysRegUsed(CSRegs[I])) | 
|  | return false; | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | bool | 
|  | GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const { | 
|  | unsigned NumRegs = Intervals.size(); | 
|  |  | 
|  | if (NumRegs > MaxNumVGPRs) | 
|  | return false; | 
|  | unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0; | 
|  |  | 
|  | for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) { | 
|  | if (!canAssign(Reg, NumRegs)) | 
|  | continue; | 
|  |  | 
|  | if (tryAssignRegisters(Intervals, Reg)) | 
|  | return true; | 
|  | } | 
|  |  | 
|  | return false; | 
|  | } | 
|  |  | 
|  | GCNNSAReassign::NSA_Status | 
|  | GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { | 
|  | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); | 
|  | if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA) | 
|  | return NSA_Status::NOT_NSA; | 
|  |  | 
|  | int VAddr0Idx = | 
|  | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); | 
|  |  | 
|  | unsigned VgprBase = 0; | 
|  | bool NSA = false; | 
|  | for (unsigned I = 0; I < Info->VAddrDwords; ++I) { | 
|  | const MachineOperand &Op = MI.getOperand(VAddr0Idx + I); | 
|  | Register Reg = Op.getReg(); | 
|  | if (Register::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg)) | 
|  | return NSA_Status::FIXED; | 
|  |  | 
|  | Register PhysReg = VRM->getPhys(Reg); | 
|  |  | 
|  | if (!Fast) { | 
|  | if (!PhysReg) | 
|  | return NSA_Status::FIXED; | 
|  |  | 
|  | // Bail if address is not a VGPR32. That should be possible to extend the | 
|  | // optimization to work with subregs of a wider register tuples, but the | 
|  | // logic to find free registers will be much more complicated with much | 
|  | // less chances for success. That seems reasonable to assume that in most | 
|  | // cases a tuple is used because a vector variable contains different | 
|  | // parts of an address and it is either already consequitive or cannot | 
|  | // be reassigned if not. If needed it is better to rely on register | 
|  | // coalescer to process such address tuples. | 
|  | if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg()) | 
|  | return NSA_Status::FIXED; | 
|  |  | 
|  | const MachineInstr *Def = MRI->getUniqueVRegDef(Reg); | 
|  |  | 
|  | if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg) | 
|  | return NSA_Status::FIXED; | 
|  |  | 
|  | for (auto U : MRI->use_nodbg_operands(Reg)) { | 
|  | if (U.isImplicit()) | 
|  | return NSA_Status::FIXED; | 
|  | const MachineInstr *UseInst = U.getParent(); | 
|  | if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg) | 
|  | return NSA_Status::FIXED; | 
|  | } | 
|  |  | 
|  | if (!LIS->hasInterval(Reg)) | 
|  | return NSA_Status::FIXED; | 
|  | } | 
|  |  | 
|  | if (I == 0) | 
|  | VgprBase = PhysReg; | 
|  | else if (VgprBase + I != PhysReg) | 
|  | NSA = true; | 
|  | } | 
|  |  | 
|  | return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS; | 
|  | } | 
|  |  | 
|  | bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { | 
|  | ST = &MF.getSubtarget<GCNSubtarget>(); | 
|  | if (ST->getGeneration() < GCNSubtarget::GFX10) | 
|  | return false; | 
|  |  | 
|  | MRI = &MF.getRegInfo(); | 
|  | TRI = ST->getRegisterInfo(); | 
|  | VRM = &getAnalysis<VirtRegMap>(); | 
|  | LRM = &getAnalysis<LiveRegMatrix>(); | 
|  | LIS = &getAnalysis<LiveIntervals>(); | 
|  |  | 
|  | const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); | 
|  | MaxNumVGPRs = ST->getMaxNumVGPRs(MF); | 
|  | MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs); | 
|  | CSRegs = MRI->getCalleeSavedRegs(); | 
|  |  | 
|  | using Candidate = std::pair<const MachineInstr*, bool>; | 
|  | SmallVector<Candidate, 32> Candidates; | 
|  | for (const MachineBasicBlock &MBB : MF) { | 
|  | for (const MachineInstr &MI : MBB) { | 
|  | switch (CheckNSA(MI)) { | 
|  | default: | 
|  | continue; | 
|  | case NSA_Status::CONTIGUOUS: | 
|  | Candidates.push_back(std::make_pair(&MI, true)); | 
|  | break; | 
|  | case NSA_Status::NON_CONTIGUOUS: | 
|  | Candidates.push_back(std::make_pair(&MI, false)); | 
|  | ++NumNSAInstructions; | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | bool Changed = false; | 
|  | for (auto &C : Candidates) { | 
|  | if (C.second) | 
|  | continue; | 
|  |  | 
|  | const MachineInstr *MI = C.first; | 
|  | if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) { | 
|  | // Already happen to be fixed. | 
|  | C.second = true; | 
|  | ++NumNSAConverted; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode()); | 
|  | int VAddr0Idx = | 
|  | AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0); | 
|  |  | 
|  | SmallVector<LiveInterval *, 16> Intervals; | 
|  | SmallVector<unsigned, 16> OrigRegs; | 
|  | SlotIndex MinInd, MaxInd; | 
|  | for (unsigned I = 0; I < Info->VAddrDwords; ++I) { | 
|  | const MachineOperand &Op = MI->getOperand(VAddr0Idx + I); | 
|  | Register Reg = Op.getReg(); | 
|  | LiveInterval *LI = &LIS->getInterval(Reg); | 
|  | if (llvm::find(Intervals, LI) != Intervals.end()) { | 
|  | // Same register used, unable to make sequential | 
|  | Intervals.clear(); | 
|  | break; | 
|  | } | 
|  | Intervals.push_back(LI); | 
|  | OrigRegs.push_back(VRM->getPhys(Reg)); | 
|  | MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex(); | 
|  | MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex(); | 
|  | } | 
|  |  | 
|  | if (Intervals.empty()) | 
|  | continue; | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI | 
|  | << "\tOriginal allocation:\t"; | 
|  | for(auto *LI : Intervals) | 
|  | dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI); | 
|  | dbgs() << '\n'); | 
|  |  | 
|  | bool Success = scavengeRegs(Intervals); | 
|  | if (!Success) { | 
|  | LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n"); | 
|  | if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation. | 
|  | continue; | 
|  | } else { | 
|  | // Check we did not make it worse for other instructions. | 
|  | auto I = std::lower_bound(Candidates.begin(), &C, MinInd, | 
|  | [this](const Candidate &C, SlotIndex I) { | 
|  | return LIS->getInstructionIndex(*C.first) < I; | 
|  | }); | 
|  | for (auto E = Candidates.end(); Success && I != E && | 
|  | LIS->getInstructionIndex(*I->first) < MaxInd; ++I) { | 
|  | if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) { | 
|  | Success = false; | 
|  | LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (!Success) { | 
|  | for (unsigned I = 0; I < Info->VAddrDwords; ++I) | 
|  | if (VRM->hasPhys(Intervals[I]->reg)) | 
|  | LRM->unassign(*Intervals[I]); | 
|  |  | 
|  | for (unsigned I = 0; I < Info->VAddrDwords; ++I) | 
|  | LRM->assign(*Intervals[I], OrigRegs[I]); | 
|  |  | 
|  | continue; | 
|  | } | 
|  |  | 
|  | C.second = true; | 
|  | ++NumNSAConverted; | 
|  | LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t [" | 
|  | << llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI) | 
|  | << " : " | 
|  | << llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI) | 
|  | << "]\n"); | 
|  | Changed = true; | 
|  | } | 
|  |  | 
|  | return Changed; | 
|  | } |