Blame - llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp - toolchain/llvm-project

blob: 3da043f3709329f5deddcc1f86c5e775005ffaf5 [file] [log] [blame]

Ron Lieberman	cac749a	2018-11-16 01:13:34 +0000	[diff] [blame]	1	//===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	/// \file
				9	/// SIFixupVectorISel pass cleans up post ISEL Vector issues.
				10	/// Currently this will convert GLOBAL_{LOAD\|STORE}_*
				11	/// and GLOBAL_Atomic_* instructions into their _SADDR variants,
				12	/// feeding the sreg into the saddr field of the new instruction.
				13	/// We currently handle a REG_SEQUENCE feeding the vaddr
				14	/// and decompose it into a base and index.
				15	///
				16	/// Transform:
				17	/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32
				18	/// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32,
				19	/// %24:vgpr_32, %19:sreg_64_xexec
				20	/// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1
				21	/// %11:vreg_64 = COPY %16:vreg_64
				22	/// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0
				23	/// Into:
				24	/// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0
				25	/// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1
				26	/// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16...
				27	///
				28	//===----------------------------------------------------------------------===//
				29	//
				30
				31	#include "AMDGPU.h"
				32	#include "AMDGPUSubtarget.h"
				33	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
				34	#include "llvm/ADT/Statistic.h"
				35	#include "llvm/CodeGen/MachineFunctionPass.h"
				36	#include "llvm/CodeGen/MachineInstrBuilder.h"
				37	#include "llvm/CodeGen/MachineRegisterInfo.h"
				38	#include "llvm/IR/Function.h"
				39	#include "llvm/IR/LLVMContext.h"
				40	#include "llvm/Support/Debug.h"
				41	#include "llvm/Target/TargetMachine.h"
				42	#define DEBUG_TYPE "si-fixup-vector-isel"
				43
				44	using namespace llvm;
				45
				46	STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities");
				47	STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted");
				48
				49	namespace {
				50
				51	class SIFixupVectorISel : public MachineFunctionPass {
				52	public:
				53	static char ID;
				54
				55	public:
				56	SIFixupVectorISel() : MachineFunctionPass(ID) {
				57	initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry());
				58	}
				59
				60	bool runOnMachineFunction(MachineFunction &MF) override;
				61
				62	void getAnalysisUsage(AnalysisUsage &AU) const override {
				63	AU.setPreservesCFG();
				64	MachineFunctionPass::getAnalysisUsage(AU);
				65	}
				66	};
				67
				68	} // End anonymous namespace.
				69
				70	INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE,
				71	"SI Fixup Vector ISel", false, false)
				72
				73	char SIFixupVectorISel::ID = 0;
				74
				75	char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID;
				76
				77	FunctionPass *llvm::createSIFixupVectorISelPass() {
				78	return new SIFixupVectorISel();
				79	}
				80
				81	static bool findSRegBaseAndIndex(MachineOperand *Op,
				82	unsigned &BaseReg,
				83	unsigned &IndexReg,
				84	MachineRegisterInfo &MRI,
				85	const SIRegisterInfo *TRI) {
				86	SmallVector<MachineOperand *, 8> Worklist;
				87	Worklist.push_back(Op);
				88	while (!Worklist.empty()) {
				89	MachineOperand *WOp = Worklist.pop_back_val();
				90	if (!WOp->isReg() \|\|
				91	!TargetRegisterInfo::isVirtualRegister(WOp->getReg()))
				92	continue;
				93	MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg());
				94	switch (DefInst->getOpcode()) {
				95	default:
				96	continue;
				97	case AMDGPU::COPY:
				98	Worklist.push_back(&DefInst->getOperand(1));
				99	break;
				100	case AMDGPU::REG_SEQUENCE:
				101	if (DefInst->getNumOperands() != 5)
				102	continue;
				103	Worklist.push_back(&DefInst->getOperand(1));
				104	Worklist.push_back(&DefInst->getOperand(3));
				105	break;
				106	case AMDGPU::V_ADD_I32_e64:
				107	// The V_ADD_* and its analogous V_ADDCV_* are generated by
				108	// a previous pass which lowered from an ADD_64_PSEUDO,
				109	// which generates subregs to break up the 64 bit args.
				110	if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister)
				111	continue;
				112	BaseReg = DefInst->getOperand(2).getReg();
				113	if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister)
				114	continue;
				115	IndexReg = DefInst->getOperand(3).getReg();
				116	// Chase the IndexReg.
				117	MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg);
				118	if (!MI \|\| !MI->isCopy())
				119	continue;
				120	// Make sure the reg class is 64 bit for Index.
				121	// If the Index register is a subreg, we want it to reference
				122	// a 64 bit register which we will use as the Index reg.
				123	const TargetRegisterClass IdxRC, BaseRC;
				124	IdxRC = MRI.getRegClass(MI->getOperand(1).getReg());
				125	if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64)
				126	continue;
				127	IndexReg = MI->getOperand(1).getReg();
				128	// Chase the BaseReg.
				129	MI = MRI.getUniqueVRegDef(BaseReg);
				130	if (!MI \|\| !MI->isCopy())
				131	continue;
				132	// Make sure the register class is 64 bit for Base.
				133	BaseReg = MI->getOperand(1).getReg();
				134	BaseRC = MRI.getRegClass(BaseReg);
				135	if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64)
				136	continue;
				137	// Make sure Base is SReg and Index is VReg.
				138	if (!TRI->isSGPRReg(MRI, BaseReg))
				139	return false;
				140	if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg)))
				141	return false;
				142	// clear any killed flags on Index and Base regs, used later.
				143	MRI.clearKillFlags(IndexReg);
				144	MRI.clearKillFlags(BaseReg);
				145	return true;
				146	}
				147	}
				148	return false;
				149	}
				150
				151	// Identify Global LOAD\|STORE/ATOMIC and try to convert to _SADDR.
				152	static bool fixupGlobalSaddr(MachineBasicBlock &MBB,
				153	MachineFunction &MF,
				154	MachineRegisterInfo &MRI,
				155	const GCNSubtarget &ST,
				156	const SIInstrInfo *TII,
				157	const SIRegisterInfo *TRI) {
				158	bool FuncModified = false;
				159	MachineBasicBlock::iterator I, Next;
				160	for (I = MBB.begin(); I != MBB.end(); I = Next) {
				161	Next = std::next(I);
				162	MachineInstr &MI = *I;
				163	int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode());
				164	if (NewOpcd < 0)
				165	continue;
				166	// Update our statistics on opportunities seen.
				167	++NumSGPRGlobalOccurs;
				168	LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n');
				169	// Need a Base and Index or we cant transform to _SADDR.
				170	unsigned BaseReg = 0;
				171	unsigned IndexReg = 0;
				172	MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
				173	if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI))
				174	continue;
				175	++NumSGPRGlobalSaddrs;
				176	FuncModified = true;
				177	// Create the new _SADDR Memory instruction.
				178	bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr;
				179	MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
				180	MachineInstr *NewGlob = nullptr;
				181	NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd));
				182	if (HasVdst)
				183	NewGlob->addOperand(MF, MI.getOperand(0));
				184	NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false));
				185	if (VData)
				186	NewGlob->addOperand(MF, *VData);
				187	NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false));
				188	NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset));
				189
				190	MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc);
				191	// Atomics dont have a GLC, so omit the field if not there.
				192	if (Glc)
				193	NewGlob->addOperand(MF, *Glc);
				194	NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc));
				195	// _D16 have an vdst_in operand, copy it in.
				196	MachineOperand *VDstInOp = TII->getNamedOperand(MI,
				197	AMDGPU::OpName::vdst_in);
				198	if (VDstInOp)
				199	NewGlob->addOperand(MF, *VDstInOp);
				200	NewGlob->copyImplicitOps(MF, MI);
				201	NewGlob->cloneMemRefs(MF, MI);
				202	// Remove the old Global Memop instruction.
				203	MI.eraseFromParent();
				204	LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n');
				205	}
				206	return FuncModified;
				207	}
				208
				209	bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) {
				210	if (skipFunction(MF.getFunction()))
				211	return false;
				212
				213	MachineRegisterInfo &MRI = MF.getRegInfo();
				214	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
				215	const SIInstrInfo *TII = ST.getInstrInfo();
				216	const SIRegisterInfo *TRI = ST.getRegisterInfo();
				217
				218	bool FuncModified = false;
				219	for (MachineBasicBlock &MBB : MF) {
				220	// Cleanup missed Saddr opportunites from ISel.
				221	FuncModified \|= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI);
				222	}
				223	return FuncModified;
				224	}