Blame - llvm/lib/Target/AMDGPU/AMDGPUInline.cpp - toolchain/llvm-project

blob: ba9fdf2dc60c4cf14ca7503bf210949c35255346 [file] [log] [blame]

Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	1	//===- AMDGPUInline.cpp - Code to perform simple function inlining --------===//
				2	//
Chandler Carruth	2946cd7	2019-01-19 08:50:56 +0000	[diff] [blame]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	/// \file
Adrian Prantl	5f8f34e4	2018-05-01 15:54:18 +0000	[diff] [blame]	10	/// This is AMDGPU specific replacement of the standard inliner.
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	11	/// The main purpose is to account for the fact that calls not only expensive
				12	/// on the AMDGPU, but much more expensive if a private memory pointer is
				13	/// passed to a function as an argument. In this situation, we are unable to
				14	/// eliminate private memory in the caller unless inlined and end up with slow
				15	/// and expensive scratch access. Thus, we boost the inline threshold for such
				16	/// functions here.
				17	///
				18	//===----------------------------------------------------------------------===//
				19
				20
				21	#include "AMDGPU.h"
				22	#include "llvm/Transforms/IPO.h"
				23	#include "llvm/Analysis/AssumptionCache.h"
				24	#include "llvm/Analysis/CallGraph.h"
				25	#include "llvm/Analysis/InlineCost.h"
				26	#include "llvm/Analysis/ValueTracking.h"
				27	#include "llvm/Analysis/TargetTransformInfo.h"
				28	#include "llvm/IR/CallSite.h"
				29	#include "llvm/IR/DataLayout.h"
				30	#include "llvm/IR/Instructions.h"
				31	#include "llvm/IR/Module.h"
				32	#include "llvm/IR/Type.h"
				33	#include "llvm/Support/CommandLine.h"
				34	#include "llvm/Support/Debug.h"
				35	#include "llvm/Transforms/IPO/Inliner.h"
				36
				37	using namespace llvm;
				38
				39	#define DEBUG_TYPE "inline"
				40
				41	static cl::opt<int>
				42	ArgAllocaCost("amdgpu-inline-arg-alloca-cost", cl::Hidden, cl::init(2200),
				43	cl::desc("Cost of alloca argument"));
				44
				45	// If the amount of scratch memory to eliminate exceeds our ability to allocate
Sanjay Patel	de58e93	2018-11-07 14:35:36 +0000	[diff] [blame]	46	// it into registers we gain nothing by aggressively inlining functions for that
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	47	// heuristic.
				48	static cl::opt<unsigned>
				49	ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256),
				50	cl::desc("Maximum alloca size to use for inline cost"));
				51
Valery Pykhtin	cb8de55	2019-06-07 12:16:46 +0000	[diff] [blame]	52	// Inliner constraint to achieve reasonable compilation time
				53	static cl::opt<size_t>
				54	MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300),
				55	cl::desc("Maximum BB number allowed in a function after inlining"
				56	" (compile time constraint)"));
				57
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	58	namespace {
				59
				60	class AMDGPUInliner : public LegacyInlinerBase {
				61
				62	public:
				63	AMDGPUInliner() : LegacyInlinerBase(ID) {
				64	initializeAMDGPUInlinerPass(*PassRegistry::getPassRegistry());
				65	Params = getInlineParams();
				66	}
				67
				68	static char ID; // Pass identification, replacement for typeid
				69
				70	unsigned getInlineThreshold(CallSite CS) const;
				71
				72	InlineCost getInlineCost(CallSite CS) override;
				73
				74	bool runOnSCC(CallGraphSCC &SCC) override;
				75
				76	void getAnalysisUsage(AnalysisUsage &AU) const override;
				77
				78	private:
				79	TargetTransformInfoWrapperPass *TTIWP;
				80
				81	InlineParams Params;
				82	};
				83
				84	} // end anonymous namespace
				85
				86	char AMDGPUInliner::ID = 0;
				87	INITIALIZE_PASS_BEGIN(AMDGPUInliner, "amdgpu-inline",
				88	"AMDGPU Function Integration/Inlining", false, false)
				89	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
				90	INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
				91	INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
				92	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
				93	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
				94	INITIALIZE_PASS_END(AMDGPUInliner, "amdgpu-inline",
				95	"AMDGPU Function Integration/Inlining", false, false)
				96
				97	Pass *llvm::createAMDGPUFunctionInliningPass() { return new AMDGPUInliner(); }
				98
				99	bool AMDGPUInliner::runOnSCC(CallGraphSCC &SCC) {
				100	TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
				101	return LegacyInlinerBase::runOnSCC(SCC);
				102	}
				103
				104	void AMDGPUInliner::getAnalysisUsage(AnalysisUsage &AU) const {
				105	AU.addRequired<TargetTransformInfoWrapperPass>();
				106	LegacyInlinerBase::getAnalysisUsage(AU);
				107	}
				108
				109	unsigned AMDGPUInliner::getInlineThreshold(CallSite CS) const {
				110	int Thres = Params.DefaultThreshold;
				111
				112	Function *Caller = CS.getCaller();
				113	// Listen to the inlinehint attribute when it would increase the threshold
				114	// and the caller does not need to minimize its size.
				115	Function *Callee = CS.getCalledFunction();
				116	bool InlineHint = Callee && !Callee->isDeclaration() &&
				117	Callee->hasFnAttribute(Attribute::InlineHint);
				118	if (InlineHint && Params.HintThreshold && Params.HintThreshold > Thres
				119	&& !Caller->hasFnAttribute(Attribute::MinSize))
Stanislav Mekhanoshin	fbbe523	2019-05-31 16:19:26 +0000	[diff] [blame]	120	Thres = Params.HintThreshold.getValue() *
				121	TTIWP->getTTI(*Callee).getInliningThresholdMultiplier();
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	122
				123	const DataLayout &DL = Caller->getParent()->getDataLayout();
				124	if (!Callee)
				125	return (unsigned)Thres;
				126
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	127	// If we have a pointer to private array passed into a function
				128	// it will not be optimized out, leaving scratch usage.
				129	// Increase the inline threshold to allow inliniting in this case.
				130	uint64_t AllocaSize = 0;
				131	SmallPtrSet<const AllocaInst *, 8> AIVisited;
				132	for (Value *PtrArg : CS.args()) {
Matt Arsenault	0ff901f	2019-05-24 16:52:35 +0000	[diff] [blame]	133	PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
				134	if (!Ty \|\| (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS &&
				135	Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS))
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	136	continue;
Matt Arsenault	0ff901f	2019-05-24 16:52:35 +0000	[diff] [blame]	137
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	138	PtrArg = GetUnderlyingObject(PtrArg, DL);
				139	if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
				140	if (!AI->isStaticAlloca() \|\| !AIVisited.insert(AI).second)
				141	continue;
				142	AllocaSize += DL.getTypeAllocSize(AI->getAllocatedType());
				143	// If the amount of stack memory is excessive we will not be able
				144	// to get rid of the scratch anyway, bail out.
				145	if (AllocaSize > ArgAllocaCutoff) {
				146	AllocaSize = 0;
				147	break;
				148	}
				149	}
				150	}
				151	if (AllocaSize)
				152	Thres += ArgAllocaCost;
				153
				154	return (unsigned)Thres;
				155	}
				156
				157	// Check if call is just a wrapper around another call.
				158	// In this case we only have call and ret instructions.
				159	static bool isWrapperOnlyCall(CallSite CS) {
				160	Function *Callee = CS.getCalledFunction();
				161	if (!Callee \|\| Callee->size() != 1)
				162	return false;
				163	const BasicBlock &BB = Callee->getEntryBlock();
				164	if (const Instruction *I = BB.getFirstNonPHI()) {
				165	if (!isa<CallInst>(I)) {
				166	return false;
				167	}
				168	if (isa<ReturnInst>(*std::next(I->getIterator()))) {
Nicola Zaghen	d34e60c	2018-05-14 12:53:11 +0000	[diff] [blame]	169	LLVM_DEBUG(dbgs() << " Wrapper only call detected: "
				170	<< Callee->getName() << '\n');
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	171	return true;
				172	}
				173	}
				174	return false;
				175	}
				176
				177	InlineCost AMDGPUInliner::getInlineCost(CallSite CS) {
				178	Function *Callee = CS.getCalledFunction();
				179	Function *Caller = CS.getCaller();
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	180
David Bolvansky	c0aa4b7	2018-08-05 14:53:08 +0000	[diff] [blame]	181	if (!Callee \|\| Callee->isDeclaration())
				182	return llvm::InlineCost::getNever("undefined callee");
				183
				184	if (CS.isNoInline())
				185	return llvm::InlineCost::getNever("noinline");
				186
Simon Pilgrim	19cde62	2019-04-29 17:38:18 +0000	[diff] [blame]	187	TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
David Bolvansky	c0aa4b7	2018-08-05 14:53:08 +0000	[diff] [blame]	188	if (!TTI.areInlineCompatible(Caller, Callee))
				189	return llvm::InlineCost::getNever("incompatible");
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	190
				191	if (CS.hasFnAttr(Attribute::AlwaysInline)) {
Yevgeny Rouban	15b17d0	2019-02-01 10:44:43 +0000	[diff] [blame]	192	auto IsViable = isInlineViable(*Callee);
				193	if (IsViable)
David Bolvansky	c0aa4b7	2018-08-05 14:53:08 +0000	[diff] [blame]	194	return llvm::InlineCost::getAlways("alwaysinline viable");
Yevgeny Rouban	15b17d0	2019-02-01 10:44:43 +0000	[diff] [blame]	195	return llvm::InlineCost::getNever(IsViable.message);
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	196	}
				197
				198	if (isWrapperOnlyCall(CS))
David Bolvansky	c0aa4b7	2018-08-05 14:53:08 +0000	[diff] [blame]	199	return llvm::InlineCost::getAlways("wrapper-only call");
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	200
				201	InlineParams LocalParams = Params;
				202	LocalParams.DefaultThreshold = (int)getInlineThreshold(CS);
				203	bool RemarksEnabled = false;
				204	const auto &BBs = Caller->getBasicBlockList();
				205	if (!BBs.empty()) {
				206	auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBs.front());
				207	if (DI.isEnabled())
				208	RemarksEnabled = true;
				209	}
				210
				211	OptimizationRemarkEmitter ORE(Caller);
				212	std::function<AssumptionCache &(Function &)> GetAssumptionCache =
				213	[this](Function &F) -> AssumptionCache & {
				214	return ACT->getAssumptionCache(F);
				215	};
				216
Valery Pykhtin	cb8de55	2019-06-07 12:16:46 +0000	[diff] [blame]	217	auto IC = llvm::getInlineCost(cast<CallBase>(*CS.getInstruction()), Callee,
Fedor Sergeev	652168a	2019-04-23 12:43:27 +0000	[diff] [blame]	218	LocalParams, TTI, GetAssumptionCache, None, PSI,
				219	RemarksEnabled ? &ORE : nullptr);
Valery Pykhtin	cb8de55	2019-06-07 12:16:46 +0000	[diff] [blame]	220
				221	if (IC && !IC.isAlways()) {
				222	// Single BB does not increase total BB amount, thus subtract 1
				223	size_t Size = Caller->size() + Callee->size() - 1;
				224	if (MaxBB && Size > MaxBB)
				225	return llvm::InlineCost::getNever("max number of bb exceeded");
				226	}
				227	return IC;
Stanislav Mekhanoshin	5670e6d	2017-09-20 04:25:58 +0000	[diff] [blame]	228	}