|  | //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | /// \file | 
|  | /// Any MIMG instructions that use tfe or lwe require an initialization of the | 
|  | /// result register that will be written in the case of a memory access failure | 
|  | /// The required code is also added to tie this init code to the result of the | 
|  | /// img instruction | 
|  | /// | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  |  | 
|  | #include "AMDGPU.h" | 
|  | #include "AMDGPUSubtarget.h" | 
|  | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" | 
|  | #include "SIInstrInfo.h" | 
|  | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|  | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
|  | #include "llvm/CodeGen/MachineRegisterInfo.h" | 
|  | #include "llvm/IR/Function.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Target/TargetMachine.h" | 
|  |  | 
|  | #define DEBUG_TYPE "si-img-init" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class SIAddIMGInit : public MachineFunctionPass { | 
|  | public: | 
|  | static char ID; | 
|  |  | 
|  | public: | 
|  | SIAddIMGInit() : MachineFunctionPass(ID) { | 
|  | initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry()); | 
|  | } | 
|  |  | 
|  | bool runOnMachineFunction(MachineFunction &MF) override; | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.setPreservesCFG(); | 
|  | MachineFunctionPass::getAnalysisUsage(AU); | 
|  | } | 
|  | }; | 
|  |  | 
|  | } // End anonymous namespace. | 
|  |  | 
|  | INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false) | 
|  |  | 
|  | char SIAddIMGInit::ID = 0; | 
|  |  | 
|  | char &llvm::SIAddIMGInitID = SIAddIMGInit::ID; | 
|  |  | 
|  | FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); } | 
|  |  | 
|  | bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) { | 
|  | MachineRegisterInfo &MRI = MF.getRegInfo(); | 
|  | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); | 
|  | const SIInstrInfo *TII = ST.getInstrInfo(); | 
|  | const SIRegisterInfo *RI = ST.getRegisterInfo(); | 
|  | bool Changed = false; | 
|  |  | 
|  | for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; | 
|  | ++BI) { | 
|  | MachineBasicBlock &MBB = *BI; | 
|  | MachineBasicBlock::iterator I, Next; | 
|  | for (I = MBB.begin(); I != MBB.end(); I = Next) { | 
|  | Next = std::next(I); | 
|  | MachineInstr &MI = *I; | 
|  |  | 
|  | auto Opcode = MI.getOpcode(); | 
|  | if (TII->isMIMG(Opcode) && !MI.mayStore()) { | 
|  | MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe); | 
|  | MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe); | 
|  | MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16); | 
|  |  | 
|  | // Check for instructions that don't have tfe or lwe fields | 
|  | // There shouldn't be any at this point. | 
|  | assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction"); | 
|  |  | 
|  | unsigned TFEVal = TFE->getImm(); | 
|  | unsigned LWEVal = LWE->getImm(); | 
|  | unsigned D16Val = D16 ? D16->getImm() : 0; | 
|  |  | 
|  | if (TFEVal || LWEVal) { | 
|  | // At least one of TFE or LWE are non-zero | 
|  | // We have to insert a suitable initialization of the result value and | 
|  | // tie this to the dest of the image instruction. | 
|  |  | 
|  | const DebugLoc &DL = MI.getDebugLoc(); | 
|  |  | 
|  | int DstIdx = | 
|  | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata); | 
|  |  | 
|  | // Calculate which dword we have to initialize to 0. | 
|  | MachineOperand *MO_Dmask = | 
|  | TII->getNamedOperand(MI, AMDGPU::OpName::dmask); | 
|  |  | 
|  | // check that dmask operand is found. | 
|  | assert(MO_Dmask && "Expected dmask operand in instruction"); | 
|  |  | 
|  | unsigned dmask = MO_Dmask->getImm(); | 
|  | // Determine the number of active lanes taking into account the | 
|  | // Gather4 special case | 
|  | unsigned ActiveLanes = | 
|  | TII->isGather4(Opcode) ? 4 : countPopulation(dmask); | 
|  |  | 
|  | // Subreg indices are counted from 1 | 
|  | // When D16 then we want next whole VGPR after write data. | 
|  | static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected"); | 
|  |  | 
|  | bool Packed = !ST.hasUnpackedD16VMem(); | 
|  |  | 
|  | unsigned InitIdx = | 
|  | D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1; | 
|  |  | 
|  | // Abandon attempt if the dst size isn't large enough | 
|  | // - this is in fact an error but this is picked up elsewhere and | 
|  | // reported correctly. | 
|  | uint32_t DstSize = | 
|  | RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32; | 
|  | if (DstSize < InitIdx) | 
|  | continue; | 
|  |  | 
|  | // Create a register for the intialization value. | 
|  | unsigned PrevDst = | 
|  | MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); | 
|  | unsigned NewDst = 0; // Final initialized value will be in here | 
|  |  | 
|  | // If PRTStrictNull feature is enabled (the default) then initialize | 
|  | // all the result registers to 0, otherwise just the error indication | 
|  | // register (VGPRn+1) | 
|  | unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1; | 
|  | unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx; | 
|  |  | 
|  | if (DstSize == 1) { | 
|  | // In this case we can just initialize the result directly | 
|  | BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst) | 
|  | .addImm(0); | 
|  | NewDst = PrevDst; | 
|  | } else { | 
|  | BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst); | 
|  | for (; SizeLeft; SizeLeft--, CurrIdx++) { | 
|  | NewDst = | 
|  | MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx)); | 
|  | // Initialize dword | 
|  | unsigned SubReg = | 
|  | MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); | 
|  | BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg) | 
|  | .addImm(0); | 
|  | // Insert into the super-reg | 
|  | BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) | 
|  | .addReg(PrevDst) | 
|  | .addReg(SubReg) | 
|  | .addImm(CurrIdx); | 
|  |  | 
|  | PrevDst = NewDst; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Add as an implicit operand | 
|  | MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit); | 
|  |  | 
|  | // Tie the just added implicit operand to the dst | 
|  | MI.tieOperands(DstIdx, MI.getNumOperands() - 1); | 
|  |  | 
|  | Changed = true; | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return Changed; | 
|  | } |