llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp - toolchain/llvm-project - Gitiles

 //===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 /// Any MIMG instructions that use tfe or lwe require an initialization of the
 /// result register that will be written in the case of a memory access failure
 /// The required code is also added to tie this init code to the result of the
 /// img instruction
 ///
 //===----------------------------------------------------------------------===//
 //

 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetMachine.h"

 #define DEBUG_TYPE "si-img-init"

 using namespace llvm;

 namespace {

 class SIAddIMGInit : public MachineFunctionPass {
 public:
   static char ID;

 public:
   SIAddIMGInit() : MachineFunctionPass(ID) {
     initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
   }

   bool runOnMachineFunction(MachineFunction &MF) override;

   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };

 } // End anonymous namespace.

 INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)

 char SIAddIMGInit::ID = 0;

 char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;

 FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }

 bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo *RI = ST.getRegisterInfo();
   bool Changed = false;

   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
        ++BI) {
     MachineBasicBlock &MBB = *BI;
     MachineBasicBlock::iterator I, Next;
     for (I = MBB.begin(); I != MBB.end(); I = Next) {
       Next = std::next(I);
       MachineInstr &MI = *I;

       auto Opcode = MI.getOpcode();
       if (TII->isMIMG(Opcode) && !MI.mayStore()) {
         MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
         MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
         MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);

         // Check for instructions that don't have tfe or lwe fields
         // There shouldn't be any at this point.
         assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");

         unsigned TFEVal = TFE->getImm();
         unsigned LWEVal = LWE->getImm();
         unsigned D16Val = D16 ? D16->getImm() : 0;

         if (TFEVal || LWEVal) {
           // At least one of TFE or LWE are non-zero
           // We have to insert a suitable initialization of the result value and
           // tie this to the dest of the image instruction.

           const DebugLoc &DL = MI.getDebugLoc();

           int DstIdx =
               AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);

           // Calculate which dword we have to initialize to 0.
           MachineOperand *MO_Dmask =
               TII->getNamedOperand(MI, AMDGPU::OpName::dmask);

           // check that dmask operand is found.
           assert(MO_Dmask && "Expected dmask operand in instruction");

           unsigned dmask = MO_Dmask->getImm();
           // Determine the number of active lanes taking into account the
           // Gather4 special case
           unsigned ActiveLanes =
               TII->isGather4(Opcode) ? 4 : countPopulation(dmask);

           // Subreg indices are counted from 1
           // When D16 then we want next whole VGPR after write data.
           static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");

           bool Packed = !ST.hasUnpackedD16VMem();

           unsigned InitIdx =
               D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;

           // Abandon attempt if the dst size isn't large enough
           // - this is in fact an error but this is picked up elsewhere and
           // reported correctly.
           uint32_t DstSize =
               RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
           if (DstSize < InitIdx)
             continue;

           // Create a register for the intialization value.
           unsigned PrevDst =
               MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
           unsigned NewDst = 0; // Final initialized value will be in here

           // If PRTStrictNull feature is enabled (the default) then initialize
           // all the result registers to 0, otherwise just the error indication
           // register (VGPRn+1)
           unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
           unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;

           if (DstSize == 1) {
             // In this case we can just initialize the result directly
             BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
                 .addImm(0);
             NewDst = PrevDst;
           } else {
             BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
             for (; SizeLeft; SizeLeft--, CurrIdx++) {
               NewDst =
                   MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
               // Initialize dword
               unsigned SubReg =
                   MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
               BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
                   .addImm(0);
               // Insert into the super-reg
               BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
                   .addReg(PrevDst)
                   .addReg(SubReg)
                   .addImm(CurrIdx);

               PrevDst = NewDst;
             }
           }

           // Add as an implicit operand
           MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);

           // Tie the just added implicit operand to the dst
           MI.tieOperands(DstIdx, MI.getNumOperands() - 1);

           Changed = true;
         }
       }
     }
   }

   return Changed;
 }
	//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	/// Any MIMG instructions that use tfe or lwe require an initialization of the
	/// result register that will be written in the case of a memory access failure
	/// The required code is also added to tie this init code to the result of the
	/// img instruction
	///
	//===----------------------------------------------------------------------===//
	//

	#include "AMDGPU.h"
	#include "AMDGPUSubtarget.h"
	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
	#include "SIInstrInfo.h"
	#include "llvm/CodeGen/MachineFunctionPass.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/IR/Function.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Target/TargetMachine.h"

	#define DEBUG_TYPE "si-img-init"

	using namespace llvm;

	namespace {

	class SIAddIMGInit : public MachineFunctionPass {
	public:
	static char ID;

	public:
	SIAddIMGInit() : MachineFunctionPass(ID) {
	initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
	}

	bool runOnMachineFunction(MachineFunction &MF) override;

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.setPreservesCFG();
	MachineFunctionPass::getAnalysisUsage(AU);
	}
	};

	} // End anonymous namespace.

	INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)

	char SIAddIMGInit::ID = 0;

	char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;

	FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }

	bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
	MachineRegisterInfo &MRI = MF.getRegInfo();
	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
	const SIInstrInfo *TII = ST.getInstrInfo();
	const SIRegisterInfo *RI = ST.getRegisterInfo();
	bool Changed = false;

	for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
	++BI) {
	MachineBasicBlock &MBB = *BI;
	MachineBasicBlock::iterator I, Next;
	for (I = MBB.begin(); I != MBB.end(); I = Next) {
	Next = std::next(I);
	MachineInstr &MI = *I;

	auto Opcode = MI.getOpcode();
	if (TII->isMIMG(Opcode) && !MI.mayStore()) {
	MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
	MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
	MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);

	// Check for instructions that don't have tfe or lwe fields
	// There shouldn't be any at this point.
	assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");

	unsigned TFEVal = TFE->getImm();
	unsigned LWEVal = LWE->getImm();
	unsigned D16Val = D16 ? D16->getImm() : 0;

	if (TFEVal \|\| LWEVal) {
	// At least one of TFE or LWE are non-zero
	// We have to insert a suitable initialization of the result value and
	// tie this to the dest of the image instruction.

	const DebugLoc &DL = MI.getDebugLoc();

	int DstIdx =
	AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);

	// Calculate which dword we have to initialize to 0.
	MachineOperand *MO_Dmask =
	TII->getNamedOperand(MI, AMDGPU::OpName::dmask);

	// check that dmask operand is found.
	assert(MO_Dmask && "Expected dmask operand in instruction");

	unsigned dmask = MO_Dmask->getImm();
	// Determine the number of active lanes taking into account the
	// Gather4 special case
	unsigned ActiveLanes =
	TII->isGather4(Opcode) ? 4 : countPopulation(dmask);

	// Subreg indices are counted from 1
	// When D16 then we want next whole VGPR after write data.
	static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");

	bool Packed = !ST.hasUnpackedD16VMem();

	unsigned InitIdx =
	D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;

	// Abandon attempt if the dst size isn't large enough
	// - this is in fact an error but this is picked up elsewhere and
	// reported correctly.
	uint32_t DstSize =
	RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
	if (DstSize < InitIdx)
	continue;

	// Create a register for the intialization value.
	unsigned PrevDst =
	MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
	unsigned NewDst = 0; // Final initialized value will be in here

	// If PRTStrictNull feature is enabled (the default) then initialize
	// all the result registers to 0, otherwise just the error indication
	// register (VGPRn+1)
	unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
	unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;

	if (DstSize == 1) {
	// In this case we can just initialize the result directly
	BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
	.addImm(0);
	NewDst = PrevDst;
	} else {
	BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
	for (; SizeLeft; SizeLeft--, CurrIdx++) {
	NewDst =
	MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
	// Initialize dword
	unsigned SubReg =
	MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
	BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
	.addImm(0);
	// Insert into the super-reg
	BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
	.addReg(PrevDst)
	.addReg(SubReg)
	.addImm(CurrIdx);

	PrevDst = NewDst;
	}
	}

	// Add as an implicit operand
	MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);

	// Tie the just added implicit operand to the dst
	MI.tieOperands(DstIdx, MI.getNumOperands() - 1);

	Changed = true;
	}
	}
	}
	}

	return Changed;
	}