Blame - llvm/lib/Target/AMDGPU/AMDGPUInline.cpp - toolchain/llvm-project

blob: 0ad78e036a18a078c4970b4baf312dba3cd6387d [file] [log] [blame]

Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	1	//===- AMDGPUInline.cpp - Code to perform simple function inlining --------===//
				2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	/// \file
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	10	/// This is AMDGPU specific replacement of the standard inliner.
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	11	/// The main purpose is to account for the fact that calls not only expensive
				12	/// on the AMDGPU, but much more expensive if a private memory pointer is
				13	/// passed to a function as an argument. In this situation, we are unable to
				14	/// eliminate private memory in the caller unless inlined and end up with slow
				15	/// and expensive scratch access. Thus, we boost the inline threshold for such
				16	/// functions here.
				17	///
				18	//===----------------------------------------------------------------------===//
				19
				20
				21	#include "AMDGPU.h"
				22	#include "llvm/Transforms/IPO.h"
				23	#include "llvm/Analysis/AssumptionCache.h"
				24	#include "llvm/Analysis/CallGraph.h"
				25	#include "llvm/Analysis/InlineCost.h"
				26	#include "llvm/Analysis/ValueTracking.h"
				27	#include "llvm/Analysis/TargetTransformInfo.h"
				28	#include "llvm/IR/CallSite.h"
				29	#include "llvm/IR/DataLayout.h"
				30	#include "llvm/IR/Instructions.h"
				31	#include "llvm/IR/Module.h"
				32	#include "llvm/IR/Type.h"
				33	#include "llvm/Support/CommandLine.h"
				34	#include "llvm/Support/Debug.h"
				35	#include "llvm/Transforms/IPO/Inliner.h"
				36
				37	using namespace llvm;
				38
				39	#define DEBUG_TYPE "inline"
				40
				41	static cl::opt<int>
				42	ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
				43	cl::desc("Cost of alloca argument"));
				44
				45	// If the amount of scratch memory to eliminate exceeds our ability to allocate
Sanjay Patel	de58e93	2018-11-07 14:35:36 +0000	[diff] [blame]	46	// it into registers we gain nothing by aggressively inlining functions for that
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	47	// heuristic.
				48	static cl::opt<unsigned>
				49	ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
				50	cl::desc("Maximum alloca size to use for inline cost"));
				51
				52	namespace {
				53
				54	class AMDGPUInliner : public LegacyInlinerBase {
				55
				56	public:
				57	AMDGPUInliner() : LegacyInlinerBase(ID) {
				58	initializeAMDGPUInlinerPass(*PassRegistry::getPassRegistry());
				59	Params = getInlineParams();
				60	}
				61
				62	static char ID; // Pass identification, replacement for typeid
				63
				64	unsigned getInlineThreshold(CallSite CS) const;
				65
				66	InlineCost getInlineCost(CallSite CS) override;
				67
				68	bool runOnSCC(CallGraphSCC &SCC) override;
				69
				70	void getAnalysisUsage(AnalysisUsage &AU) const override;
				71
				72	private:
				73	TargetTransformInfoWrapperPass *TTIWP;
				74
				75	InlineParams Params;
				76	};
				77
				78	} // end anonymous namespace
				79
				80	char AMDGPUInliner::ID = 0;
				81	INITIALIZE_PASS_BEGIN(AMDGPUInliner, "amdgpu-inline",
				82	"AMDGPU Function Integration/Inlining", false, false)
				83	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
				84	INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
				85	INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
				86	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
				87	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
				88	INITIALIZE_PASS_END(AMDGPUInliner, "amdgpu-inline",
				89	"AMDGPU Function Integration/Inlining", false, false)
				90
				91	Pass *llvm::createAMDGPUFunctionInliningPass() { return new AMDGPUInliner(); }
				92
				93	bool AMDGPUInliner::runOnSCC(CallGraphSCC &SCC) {
				94	TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
				95	return LegacyInlinerBase::runOnSCC(SCC);
				96	}
				97
				98	void AMDGPUInliner::getAnalysisUsage(AnalysisUsage &AU) const {
				99	AU.addRequired<TargetTransformInfoWrapperPass>();
				100	LegacyInlinerBase::getAnalysisUsage(AU);
				101	}
				102
				103	unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
				104	int Thres = Params.DefaultThreshold;
				105
				106	Function *Caller = CS.getCaller();
				107	// Listen to the inlinehint attribute when it would increase the threshold
				108	// and the caller does not need to minimize its size.
				109	Function *Callee = CS.getCalledFunction();
				110	bool InlineHint = Callee && !Callee->isDeclaration() &&
				111	Callee->hasFnAttribute(Attribute::InlineHint);
				112	if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
				113	&& !Caller->hasFnAttribute(Attribute::MinSize))
				114	Thres = Params.HintThreshold.getValue();
				115
				116	const DataLayout &DL = Caller->getParent()->getDataLayout();
				117	if (!Callee)
				118	return (unsigned)Thres;
				119
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	120	// If we have a pointer to private array passed into a function
				121	// it will not be optimized out, leaving scratch usage.
				122	// Increase the inline threshold to allow inliniting in this case.
				123	uint64_t AllocaSize = 0;
				124	SmallPtrSet<const AllocaInst *, 8> AIVisited;
				125	for (Value *PtrArg : CS.args()) {
				126	Type *Ty = PtrArg->getType();
				127	if (!Ty->isPointerTy() \|\|
Matt Arsenault	0da6350	2018-08-31 05:49:54 +0000	[diff] [blame]	128	Ty->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	129	continue;
				130	PtrArg = GetUnderlyingObject(PtrArg, DL);
				131	if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
				132	if (!AI->isStaticAlloca() \|\| !AIVisited.insert(AI).second)
				133	continue;
				134	AllocaSize += DL.getTypeAllocSize(AI->getAllocatedType());
				135	// If the amount of stack memory is excessive we will not be able
				136	// to get rid of the scratch anyway, bail out.
				137	if (AllocaSize > ArgAllocaCutoff) {
				138	AllocaSize = 0;
				139	break;
				140	}
				141	}
				142	}
				143	if (AllocaSize)
				144	Thres += ArgAllocaCost;
				145
				146	return (unsigned)Thres;
				147	}
				148
				149	// Check if call is just a wrapper around another call.
				150	// In this case we only have call and ret instructions.
				151	static bool isWrapperOnlyCall(CallSite CS) {
				152	Function *Callee = CS.getCalledFunction();
				153	if (!Callee \|\| Callee->size() != 1)
				154	return false;
				155	const BasicBlock &BB = Callee->getEntryBlock();
				156	if (const Instruction *I = BB.getFirstNonPHI()) {
				157	if (!isa<CallInst>(I)) {
				158	return false;
				159	}
				160	if (isa<ReturnInst>(*std::next(I->getIterator()))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	161	LLVM_DEBUG(dbgs() << " Wrapper only call detected: "
				162	<< Callee->getName() << '\n');
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	163	return true;
				164	}
				165	}
				166	return false;
				167	}
				168
				169	InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
				170	Function *Callee = CS.getCalledFunction();
				171	Function *Caller = CS.getCaller();
				172	TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
				173
David Bolvansky	c0aa4b7	2018-08-05 14:53:08 +0000	[diff] [blame]	174	if (!Callee \|\| Callee->isDeclaration())
				175	return llvm::InlineCost::getNever("undefined callee");
				176
				177	if (CS.isNoInline())
				178	return llvm::InlineCost::getNever("noinline");
				179
				180	if (!TTI.areInlineCompatible(Caller, Callee))
				181	return llvm::InlineCost::getNever("incompatible");
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	182
				183	if (CS.hasFnAttr(Attribute::AlwaysInline)) {
Yevgeny Rouban	15b17d0	2019-02-01 10:44:43 +0000	[diff] [blame^]	184	auto IsViable = isInlineViable(*Callee);
				185	if (IsViable)
David Bolvansky	c0aa4b7	2018-08-05 14:53:08 +0000	[diff] [blame]	186	return llvm::InlineCost::getAlways("alwaysinline viable");
Yevgeny Rouban	15b17d0	2019-02-01 10:44:43 +0000	[diff] [blame^]	187	return llvm::InlineCost::getNever(IsViable.message);
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	188	}
				189
				190	if (isWrapperOnlyCall(CS))
David Bolvansky	c0aa4b7	2018-08-05 14:53:08 +0000	[diff] [blame]	191	return llvm::InlineCost::getAlways("wrapper-only call");
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	192
				193	InlineParams LocalParams = Params;
				194	LocalParams.DefaultThreshold = (int)getInlineThreshold(CS);
				195	bool RemarksEnabled = false;
				196	const auto &BBs = Caller->getBasicBlockList();
				197	if (!BBs.empty()) {
				198	auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBs.front());
				199	if (DI.isEnabled())
				200	RemarksEnabled = true;
				201	}
				202
				203	OptimizationRemarkEmitter ORE(Caller);
				204	std::function<AssumptionCache &(Function &)> GetAssumptionCache =
				205	[this](Function &F) -> AssumptionCache & {
				206	return ACT->getAssumptionCache(F);
				207	};
				208
				209	return llvm::getInlineCost(CS, Callee, LocalParams, TTI, GetAssumptionCache,
				210	None, PSI, RemarksEnabled ? &ORE : nullptr);
				211	}