Blame - llvm/lib/Target/AMDGPU/AMDGPUInline.cpp - toolchain/llvm-project

blob: 945c9acd379a5f3ba39d1803441f5c50504dfcb8 [file] [log] [blame]

Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	1	//===- AMDGPUInline.cpp - Code to perform simple function inlining --------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	/// \file
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	11	/// This is AMDGPU specific replacement of the standard inliner.
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	12	/// The main purpose is to account for the fact that calls not only expensive
				13	/// on the AMDGPU, but much more expensive if a private memory pointer is
				14	/// passed to a function as an argument. In this situation, we are unable to
				15	/// eliminate private memory in the caller unless inlined and end up with slow
				16	/// and expensive scratch access. Thus, we boost the inline threshold for such
				17	/// functions here.
				18	///
				19	//===----------------------------------------------------------------------===//
				20
				21
				22	#include "AMDGPU.h"
				23	#include "llvm/Transforms/IPO.h"
				24	#include "llvm/Analysis/AssumptionCache.h"
				25	#include "llvm/Analysis/CallGraph.h"
				26	#include "llvm/Analysis/InlineCost.h"
				27	#include "llvm/Analysis/ValueTracking.h"
				28	#include "llvm/Analysis/TargetTransformInfo.h"
				29	#include "llvm/IR/CallSite.h"
				30	#include "llvm/IR/DataLayout.h"
				31	#include "llvm/IR/Instructions.h"
				32	#include "llvm/IR/Module.h"
				33	#include "llvm/IR/Type.h"
				34	#include "llvm/Support/CommandLine.h"
				35	#include "llvm/Support/Debug.h"
				36	#include "llvm/Transforms/IPO/Inliner.h"
				37
				38	using namespace llvm;
				39
				40	#define DEBUG_TYPE "inline"
				41
				42	static cl::opt<int>
				43	ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
				44	cl::desc("Cost of alloca argument"));
				45
				46	// If the amount of scratch memory to eliminate exceeds our ability to allocate
Sanjay Patel	de58e93	2018-11-07 14:35:36 +0000	[diff] [blame]	47	// it into registers we gain nothing by aggressively inlining functions for that
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	48	// heuristic.
				49	static cl::opt<unsigned>
				50	ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
				51	cl::desc("Maximum alloca size to use for inline cost"));
				52
				53	namespace {
				54
				55	class AMDGPUInliner : public LegacyInlinerBase {
				56
				57	public:
				58	AMDGPUInliner() : LegacyInlinerBase(ID) {
				59	initializeAMDGPUInlinerPass(*PassRegistry::getPassRegistry());
				60	Params = getInlineParams();
				61	}
				62
				63	static char ID; // Pass identification, replacement for typeid
				64
				65	unsigned getInlineThreshold(CallSite CS) const;
				66
				67	InlineCost getInlineCost(CallSite CS) override;
				68
				69	bool runOnSCC(CallGraphSCC &SCC) override;
				70
				71	void getAnalysisUsage(AnalysisUsage &AU) const override;
				72
				73	private:
				74	TargetTransformInfoWrapperPass *TTIWP;
				75
				76	InlineParams Params;
				77	};
				78
				79	} // end anonymous namespace
				80
				81	char AMDGPUInliner::ID = 0;
				82	INITIALIZE_PASS_BEGIN(AMDGPUInliner, "amdgpu-inline",
				83	"AMDGPU Function Integration/Inlining", false, false)
				84	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
				85	INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
				86	INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
				87	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
				88	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
				89	INITIALIZE_PASS_END(AMDGPUInliner, "amdgpu-inline",
				90	"AMDGPU Function Integration/Inlining", false, false)
				91
				92	Pass *llvm::createAMDGPUFunctionInliningPass() { return new AMDGPUInliner(); }
				93
				94	bool AMDGPUInliner::runOnSCC(CallGraphSCC &SCC) {
				95	TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
				96	return LegacyInlinerBase::runOnSCC(SCC);
				97	}
				98
				99	void AMDGPUInliner::getAnalysisUsage(AnalysisUsage &AU) const {
				100	AU.addRequired<TargetTransformInfoWrapperPass>();
				101	LegacyInlinerBase::getAnalysisUsage(AU);
				102	}
				103
				104	unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
				105	int Thres = Params.DefaultThreshold;
				106
				107	Function *Caller = CS.getCaller();
				108	// Listen to the inlinehint attribute when it would increase the threshold
				109	// and the caller does not need to minimize its size.
				110	Function *Callee = CS.getCalledFunction();
				111	bool InlineHint = Callee && !Callee->isDeclaration() &&
				112	Callee->hasFnAttribute(Attribute::InlineHint);
				113	if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
				114	&& !Caller->hasFnAttribute(Attribute::MinSize))
				115	Thres = Params.HintThreshold.getValue();
				116
				117	const DataLayout &DL = Caller->getParent()->getDataLayout();
				118	if (!Callee)
				119	return (unsigned)Thres;
				120
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	121	// If we have a pointer to private array passed into a function
				122	// it will not be optimized out, leaving scratch usage.
				123	// Increase the inline threshold to allow inliniting in this case.
				124	uint64_t AllocaSize = 0;
				125	SmallPtrSet<const AllocaInst *, 8> AIVisited;
				126	for (Value *PtrArg : CS.args()) {
				127	Type *Ty = PtrArg->getType();
				128	if (!Ty->isPointerTy() \|\|
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	129	Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	130	continue;
				131	PtrArg = GetUnderlyingObject(PtrArg, DL);
				132	if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
				133	if (!AI->isStaticAlloca() \|\| !AIVisited.insert(AI).second)
				134	continue;
				135	AllocaSize += DL.getTypeAllocSize(AI->getAllocatedType());
				136	// If the amount of stack memory is excessive we will not be able
				137	// to get rid of the scratch anyway, bail out.
				138	if (AllocaSize > ArgAllocaCutoff) {
				139	AllocaSize = 0;
				140	break;
				141	}
				142	}
				143	}
				144	if (AllocaSize)
				145	Thres += ArgAllocaCost;
				146
				147	return (unsigned)Thres;
				148	}
				149
				150	// Check if call is just a wrapper around another call.
				151	// In this case we only have call and ret instructions.
				152	static bool isWrapperOnlyCall(CallSite CS) {
				153	Function *Callee = CS.getCalledFunction();
				154	if (!Callee \|\| Callee->size() != 1)
				155	return false;
				156	const BasicBlock &BB = Callee->getEntryBlock();
				157	if (const Instruction *I = BB.getFirstNonPHI()) {
				158	if (!isa<CallInst>(I)) {
				159	return false;
				160	}
				161	if (isa<ReturnInst>(*std::next(I->getIterator()))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	162	LLVM_DEBUG(dbgs() << " Wrapper only call detected: "
				163	<< Callee->getName() << '\n');
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	164	return true;
				165	}
				166	}
				167	return false;
				168	}
				169
				170	InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
				171	Function *Callee = CS.getCalledFunction();
				172	Function *Caller = CS.getCaller();
				173	TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
				174
David Bolvansky	c0aa4b7	2018-08-05 14:53:08 +0000	[diff] [blame]	175	if (!Callee \|\| Callee->isDeclaration())
				176	return llvm::InlineCost::getNever("undefined callee");
				177
				178	if (CS.isNoInline())
				179	return llvm::InlineCost::getNever("noinline");
				180
				181	if (!TTI.areInlineCompatible(Caller, Callee))
				182	return llvm::InlineCost::getNever("incompatible");
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	183
				184	if (CS.hasFnAttr(Attribute::AlwaysInline)) {
				185	if (isInlineViable(*Callee))
David Bolvansky	c0aa4b7	2018-08-05 14:53:08 +0000	[diff] [blame]	186	return llvm::InlineCost::getAlways("alwaysinline viable");
				187	return llvm::InlineCost::getNever("alwaysinline unviable");
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	188	}
				189
				190	if (isWrapperOnlyCall(CS))
David Bolvansky	c0aa4b7	2018-08-05 14:53:08 +0000	[diff] [blame]	191	return llvm::InlineCost::getAlways("wrapper-only call");
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	192
				193	InlineParams LocalParams = Params;
				194	LocalParams.DefaultThreshold = (int)getInlineThreshold(CS);
				195	bool RemarksEnabled = false;
				196	const auto &BBs = Caller->getBasicBlockList();
				197	if (!BBs.empty()) {
				198	auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBs.front());
				199	if (DI.isEnabled())
				200	RemarksEnabled = true;
				201	}
				202
				203	OptimizationRemarkEmitter ORE(Caller);
				204	std::function<AssumptionCache &(Function &)> GetAssumptionCache =
				205	[this](Function &F) -> AssumptionCache & {
				206	return ACT->getAssumptionCache(F);
				207	};
				208
				209	return llvm::getInlineCost(CS, Callee, LocalParams, TTI, GetAssumptionCache,
				210	None, PSI, RemarksEnabled ? &ORE : nullptr);
				211	}