Blame - llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp - toolchain/llvm-project

blob: f8094e35816c19b0f64055662918e71642e1cde1 [file] [log] [blame]

David Stuttard	f77079f	2019-01-14 11:55:24 +0000	[diff] [blame]	1	//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
				2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
David Stuttard	f77079f	2019-01-14 11:55:24 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	/// \file
				10	/// Any MIMG instructions that use tfe or lwe require an initialization of the
				11	/// result register that will be written in the case of a memory access failure
				12	/// The required code is also added to tie this init code to the result of the
				13	/// img instruction
				14	///
				15	//===----------------------------------------------------------------------===//
				16	//
				17
				18	#include "AMDGPU.h"
				19	#include "AMDGPUSubtarget.h"
				20	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
				21	#include "SIInstrInfo.h"
				22	#include "llvm/CodeGen/MachineFunctionPass.h"
				23	#include "llvm/CodeGen/MachineInstrBuilder.h"
				24	#include "llvm/CodeGen/MachineRegisterInfo.h"
				25	#include "llvm/IR/Function.h"
				26	#include "llvm/Support/Debug.h"
				27	#include "llvm/Target/TargetMachine.h"
				28
				29	#define DEBUG_TYPE "si-img-init"
				30
				31	using namespace llvm;
				32
				33	namespace {
				34
				35	class SIAddIMGInit : public MachineFunctionPass {
				36	public:
				37	static char ID;
				38
				39	public:
				40	SIAddIMGInit() : MachineFunctionPass(ID) {
				41	initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
				42	}
				43
				44	bool runOnMachineFunction(MachineFunction &MF) override;
				45
				46	void getAnalysisUsage(AnalysisUsage &AU) const override {
				47	AU.setPreservesCFG();
				48	MachineFunctionPass::getAnalysisUsage(AU);
				49	}
				50	};
				51
				52	} // End anonymous namespace.
				53
				54	INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
				55
				56	char SIAddIMGInit::ID = 0;
				57
				58	char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
				59
				60	FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
				61
				62	bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
				63	MachineRegisterInfo &MRI = MF.getRegInfo();
				64	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
				65	const SIInstrInfo *TII = ST.getInstrInfo();
				66	const SIRegisterInfo *RI = ST.getRegisterInfo();
				67	bool Changed = false;
				68
				69	for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
				70	++BI) {
				71	MachineBasicBlock &MBB = *BI;
				72	MachineBasicBlock::iterator I, Next;
				73	for (I = MBB.begin(); I != MBB.end(); I = Next) {
				74	Next = std::next(I);
				75	MachineInstr &MI = *I;
				76
				77	auto Opcode = MI.getOpcode();
				78	if (TII->isMIMG(Opcode) && !MI.mayStore()) {
				79	MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
				80	MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
				81	MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
				82
				83	// Check for instructions that don't have tfe or lwe fields
				84	// There shouldn't be any at this point.
				85	assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
				86
				87	unsigned TFEVal = TFE->getImm();
				88	unsigned LWEVal = LWE->getImm();
				89	unsigned D16Val = D16 ? D16->getImm() : 0;
				90
				91	if (TFEVal \|\| LWEVal) {
				92	// At least one of TFE or LWE are non-zero
				93	// We have to insert a suitable initialization of the result value and
				94	// tie this to the dest of the image instruction.
				95
				96	const DebugLoc &DL = MI.getDebugLoc();
				97
				98	int DstIdx =
				99	AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
				100
				101	// Calculate which dword we have to initialize to 0.
				102	MachineOperand *MO_Dmask =
				103	TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
				104
				105	// check that dmask operand is found.
				106	assert(MO_Dmask && "Expected dmask operand in instruction");
				107
				108	unsigned dmask = MO_Dmask->getImm();
				109	// Determine the number of active lanes taking into account the
				110	// Gather4 special case
				111	unsigned ActiveLanes =
				112	TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
				113
				114	// Subreg indices are counted from 1
				115	// When D16 then we want next whole VGPR after write data.
				116	static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
				117
				118	bool Packed = !ST.hasUnpackedD16VMem();
				119
				120	unsigned InitIdx =
				121	D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
				122
				123	// Abandon attempt if the dst size isn't large enough
				124	// - this is in fact an error but this is picked up elsewhere and
				125	// reported correctly.
				126	uint32_t DstSize =
				127	RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
				128	if (DstSize < InitIdx)
				129	continue;
				130
				131	// Create a register for the intialization value.
				132	unsigned PrevDst =
				133	MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
				134	unsigned NewDst = 0; // Final initialized value will be in here
				135
				136	// If PRTStrictNull feature is enabled (the default) then initialize
				137	// all the result registers to 0, otherwise just the error indication
				138	// register (VGPRn+1)
				139	unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
				140	unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
				141
				142	if (DstSize == 1) {
				143	// In this case we can just initialize the result directly
				144	BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
				145	.addImm(0);
				146	NewDst = PrevDst;
				147	} else {
				148	BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
				149	for (; SizeLeft; SizeLeft--, CurrIdx++) {
				150	NewDst =
				151	MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
				152	// Initialize dword
				153	unsigned SubReg =
				154	MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
				155	BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
				156	.addImm(0);
				157	// Insert into the super-reg
				158	BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
				159	.addReg(PrevDst)
				160	.addReg(SubReg)
				161	.addImm(CurrIdx);
				162
				163	PrevDst = NewDst;
				164	}
				165	}
				166
				167	// Add as an implicit operand
				168	MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
				169
				170	// Tie the just added implicit operand to the dst
				171	MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
				172
				173	Changed = true;
				174	}
				175	}
				176	}
				177	}
				178
				179	return Changed;
				180	}