|  | //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | /// \file | 
|  | /// This pass marks all internal functions as always_inline and creates | 
|  | /// duplicates of all other functions and marks the duplicates as always_inline. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "AMDGPU.h" | 
|  | #include "AMDGPUTargetMachine.h" | 
|  | #include "Utils/AMDGPUBaseInfo.h" | 
|  | #include "llvm/ADT/SmallPtrSet.h" | 
|  | #include "llvm/IR/Module.h" | 
|  | #include "llvm/Transforms/Utils/Cloning.h" | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | static cl::opt<bool> StressCalls( | 
|  | "amdgpu-stress-function-calls", | 
|  | cl::Hidden, | 
|  | cl::desc("Force all functions to be noinline"), | 
|  | cl::init(false)); | 
|  |  | 
|  | class AMDGPUAlwaysInline : public ModulePass { | 
|  | bool GlobalOpt; | 
|  |  | 
|  | void recursivelyVisitUsers(GlobalValue &GV, | 
|  | SmallPtrSetImpl<Function *> &FuncsToAlwaysInline); | 
|  | public: | 
|  | static char ID; | 
|  |  | 
|  | AMDGPUAlwaysInline(bool GlobalOpt = false) : | 
|  | ModulePass(ID), GlobalOpt(GlobalOpt) { } | 
|  | bool runOnModule(Module &M) override; | 
|  |  | 
|  | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | AU.setPreservesAll(); | 
|  | } | 
|  | }; | 
|  |  | 
|  | } // End anonymous namespace | 
|  |  | 
|  | INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", | 
|  | "AMDGPU Inline All Functions", false, false) | 
|  |  | 
|  | char AMDGPUAlwaysInline::ID = 0; | 
|  |  | 
|  | void AMDGPUAlwaysInline::recursivelyVisitUsers( | 
|  | GlobalValue &GV, | 
|  | SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { | 
|  | SmallVector<User *, 16> Stack; | 
|  |  | 
|  | SmallPtrSet<const Value *, 8> Visited; | 
|  |  | 
|  | for (User *U : GV.users()) | 
|  | Stack.push_back(U); | 
|  |  | 
|  | while (!Stack.empty()) { | 
|  | User *U = Stack.pop_back_val(); | 
|  | if (!Visited.insert(U).second) | 
|  | continue; | 
|  |  | 
|  | if (Instruction *I = dyn_cast<Instruction>(U)) { | 
|  | Function *F = I->getParent()->getParent(); | 
|  | if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { | 
|  | FuncsToAlwaysInline.insert(F); | 
|  | Stack.push_back(F); | 
|  | } | 
|  |  | 
|  | // No need to look at further users, but we do need to inline any callers. | 
|  | continue; | 
|  | } | 
|  |  | 
|  | for (User *UU : U->users()) | 
|  | Stack.push_back(UU); | 
|  | } | 
|  | } | 
|  |  | 
|  | bool AMDGPUAlwaysInline::runOnModule(Module &M) { | 
|  | std::vector<GlobalAlias*> AliasesToRemove; | 
|  |  | 
|  | SmallPtrSet<Function *, 8> FuncsToAlwaysInline; | 
|  | SmallPtrSet<Function *, 8> FuncsToNoInline; | 
|  |  | 
|  | for (GlobalAlias &A : M.aliases()) { | 
|  | if (Function* F = dyn_cast<Function>(A.getAliasee())) { | 
|  | A.replaceAllUsesWith(F); | 
|  | AliasesToRemove.push_back(&A); | 
|  | } | 
|  |  | 
|  | // FIXME: If the aliasee isn't a function, it's some kind of constant expr | 
|  | // cast that won't be inlined through. | 
|  | } | 
|  |  | 
|  | if (GlobalOpt) { | 
|  | for (GlobalAlias* A : AliasesToRemove) { | 
|  | A->eraseFromParent(); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Always force inlining of any function that uses an LDS global address. This | 
|  | // is something of a workaround because we don't have a way of supporting LDS | 
|  | // objects defined in functions. LDS is always allocated by a kernel, and it | 
|  | // is difficult to manage LDS usage if a function may be used by multiple | 
|  | // kernels. | 
|  | // | 
|  | // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this | 
|  | // should only appear when IPO passes manages to move LDs defined in a kernel | 
|  | // into a single user function. | 
|  |  | 
|  | for (GlobalVariable &GV : M.globals()) { | 
|  | // TODO: Region address | 
|  | unsigned AS = GV.getType()->getAddressSpace(); | 
|  | if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS) | 
|  | continue; | 
|  |  | 
|  | recursivelyVisitUsers(GV, FuncsToAlwaysInline); | 
|  | } | 
|  |  | 
|  | if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { | 
|  | auto IncompatAttr | 
|  | = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; | 
|  |  | 
|  | for (Function &F : M) { | 
|  | if (!F.isDeclaration() && !F.use_empty() && | 
|  | !F.hasFnAttribute(IncompatAttr)) { | 
|  | if (StressCalls) { | 
|  | if (!FuncsToAlwaysInline.count(&F)) | 
|  | FuncsToNoInline.insert(&F); | 
|  | } else | 
|  | FuncsToAlwaysInline.insert(&F); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | for (Function *F : FuncsToAlwaysInline) | 
|  | F->addFnAttr(Attribute::AlwaysInline); | 
|  |  | 
|  | for (Function *F : FuncsToNoInline) | 
|  | F->addFnAttr(Attribute::NoInline); | 
|  |  | 
|  | return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); | 
|  | } | 
|  |  | 
|  | ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { | 
|  | return new AMDGPUAlwaysInline(GlobalOpt); | 
|  | } | 
|  |  |