| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 1 | //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | /// \file | 
|  | 11 | /// This pass marks all internal functions as always_inline and creates | 
| Alfred Huang | f9b521f | 2017-06-15 23:02:55 +0000 | [diff] [blame] | 12 | /// duplicates of all other functions and marks the duplicates as always_inline. | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 13 | // | 
|  | 14 | //===----------------------------------------------------------------------===// | 
|  | 15 |  | 
|  | 16 | #include "AMDGPU.h" | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 17 | #include "AMDGPUTargetMachine.h" | 
|  | 18 | #include "Utils/AMDGPUBaseInfo.h" | 
|  | 19 | #include "llvm/ADT/SmallPtrSet.h" | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 20 | #include "llvm/IR/Module.h" | 
|  | 21 | #include "llvm/Transforms/Utils/Cloning.h" | 
|  | 22 |  | 
|  | 23 | using namespace llvm; | 
|  | 24 |  | 
|  | 25 | namespace { | 
|  | 26 |  | 
| Matt Arsenault | 1390af2 | 2017-09-21 07:00:48 +0000 | [diff] [blame] | 27 | static cl::opt<bool> StressCalls( | 
|  | 28 | "amdgpu-stress-function-calls", | 
|  | 29 | cl::Hidden, | 
|  | 30 | cl::desc("Force all functions to be noinline"), | 
|  | 31 | cl::init(false)); | 
|  | 32 |  | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 33 | class AMDGPUAlwaysInline : public ModulePass { | 
| Stanislav Mekhanoshin | 89653df | 2017-03-30 20:16:02 +0000 | [diff] [blame] | 34 | bool GlobalOpt; | 
|  | 35 |  | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 36 | void recursivelyVisitUsers(GlobalValue &GV, | 
|  | 37 | SmallPtrSetImpl<Function *> &FuncsToAlwaysInline); | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 38 | public: | 
| Matt Arsenault | 746e065 | 2017-06-02 18:02:42 +0000 | [diff] [blame] | 39 | static char ID; | 
|  | 40 |  | 
|  | 41 | AMDGPUAlwaysInline(bool GlobalOpt = false) : | 
|  | 42 | ModulePass(ID), GlobalOpt(GlobalOpt) { } | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 43 | bool runOnModule(Module &M) override; | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 44 |  | 
|  | 45 | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|  | 46 | AU.setPreservesAll(); | 
|  | 47 | } | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 48 | }; | 
|  | 49 |  | 
|  | 50 | } // End anonymous namespace | 
|  | 51 |  | 
| Matt Arsenault | 746e065 | 2017-06-02 18:02:42 +0000 | [diff] [blame] | 52 | INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", | 
|  | 53 | "AMDGPU Inline All Functions", false, false) | 
|  | 54 |  | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 55 | char AMDGPUAlwaysInline::ID = 0; | 
|  | 56 |  | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 57 | void AMDGPUAlwaysInline::recursivelyVisitUsers( | 
|  | 58 | GlobalValue &GV, | 
|  | 59 | SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { | 
|  | 60 | SmallVector<User *, 16> Stack; | 
|  | 61 |  | 
|  | 62 | SmallPtrSet<const Value *, 8> Visited; | 
|  | 63 |  | 
|  | 64 | for (User *U : GV.users()) | 
|  | 65 | Stack.push_back(U); | 
|  | 66 |  | 
|  | 67 | while (!Stack.empty()) { | 
|  | 68 | User *U = Stack.pop_back_val(); | 
|  | 69 | if (!Visited.insert(U).second) | 
|  | 70 | continue; | 
|  | 71 |  | 
|  | 72 | if (Instruction *I = dyn_cast<Instruction>(U)) { | 
|  | 73 | Function *F = I->getParent()->getParent(); | 
|  | 74 | if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { | 
|  | 75 | FuncsToAlwaysInline.insert(F); | 
|  | 76 | Stack.push_back(F); | 
|  | 77 | } | 
|  | 78 |  | 
|  | 79 | // No need to look at further users, but we do need to inline any callers. | 
|  | 80 | continue; | 
|  | 81 | } | 
|  | 82 |  | 
|  | 83 | for (User *UU : U->users()) | 
|  | 84 | Stack.push_back(UU); | 
|  | 85 | } | 
|  | 86 | } | 
|  | 87 |  | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 88 | bool AMDGPUAlwaysInline::runOnModule(Module &M) { | 
| Nikolay Haustov | eba8089 | 2016-08-31 11:18:33 +0000 | [diff] [blame] | 89 | std::vector<GlobalAlias*> AliasesToRemove; | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 90 |  | 
|  | 91 | SmallPtrSet<Function *, 8> FuncsToAlwaysInline; | 
|  | 92 | SmallPtrSet<Function *, 8> FuncsToNoInline; | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 93 |  | 
| Nikolay Haustov | eba8089 | 2016-08-31 11:18:33 +0000 | [diff] [blame] | 94 | for (GlobalAlias &A : M.aliases()) { | 
|  | 95 | if (Function* F = dyn_cast<Function>(A.getAliasee())) { | 
|  | 96 | A.replaceAllUsesWith(F); | 
|  | 97 | AliasesToRemove.push_back(&A); | 
|  | 98 | } | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 99 |  | 
|  | 100 | // FIXME: If the aliasee isn't a function, it's some kind of constant expr | 
|  | 101 | // cast that won't be inlined through. | 
| Nikolay Haustov | eba8089 | 2016-08-31 11:18:33 +0000 | [diff] [blame] | 102 | } | 
|  | 103 |  | 
| Stanislav Mekhanoshin | 89653df | 2017-03-30 20:16:02 +0000 | [diff] [blame] | 104 | if (GlobalOpt) { | 
|  | 105 | for (GlobalAlias* A : AliasesToRemove) { | 
|  | 106 | A->eraseFromParent(); | 
|  | 107 | } | 
| Nikolay Haustov | eba8089 | 2016-08-31 11:18:33 +0000 | [diff] [blame] | 108 | } | 
|  | 109 |  | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 110 | // Always force inlining of any function that uses an LDS global address. This | 
|  | 111 | // is something of a workaround because we don't have a way of supporting LDS | 
|  | 112 | // objects defined in functions. LDS is always allocated by a kernel, and it | 
|  | 113 | // is difficult to manage LDS usage if a function may be used by multiple | 
|  | 114 | // kernels. | 
|  | 115 | // | 
|  | 116 | // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this | 
|  | 117 | // should only appear when IPO passes manages to move LDs defined in a kernel | 
|  | 118 | // into a single user function. | 
| Matt Arsenault | 1390af2 | 2017-09-21 07:00:48 +0000 | [diff] [blame] | 119 |  | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 120 | for (GlobalVariable &GV : M.globals()) { | 
|  | 121 | // TODO: Region address | 
|  | 122 | unsigned AS = GV.getType()->getAddressSpace(); | 
| Matt Arsenault | 0da6350 | 2018-08-31 05:49:54 +0000 | [diff] [blame] | 123 | if (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS) | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 124 | continue; | 
|  | 125 |  | 
|  | 126 | recursivelyVisitUsers(GV, FuncsToAlwaysInline); | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 127 | } | 
|  | 128 |  | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 129 | if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { | 
|  | 130 | auto IncompatAttr | 
|  | 131 | = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 132 |  | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 133 | for (Function &F : M) { | 
|  | 134 | if (!F.isDeclaration() && !F.use_empty() && | 
|  | 135 | !F.hasFnAttribute(IncompatAttr)) { | 
|  | 136 | if (StressCalls) { | 
|  | 137 | if (!FuncsToAlwaysInline.count(&F)) | 
|  | 138 | FuncsToNoInline.insert(&F); | 
|  | 139 | } else | 
|  | 140 | FuncsToAlwaysInline.insert(&F); | 
|  | 141 | } | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 142 | } | 
|  | 143 | } | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 144 |  | 
|  | 145 | for (Function *F : FuncsToAlwaysInline) | 
|  | 146 | F->addFnAttr(Attribute::AlwaysInline); | 
|  | 147 |  | 
|  | 148 | for (Function *F : FuncsToNoInline) | 
|  | 149 | F->addFnAttr(Attribute::NoInline); | 
|  | 150 |  | 
|  | 151 | return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 152 | } | 
|  | 153 |  | 
| Stanislav Mekhanoshin | 89653df | 2017-03-30 20:16:02 +0000 | [diff] [blame] | 154 | ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { | 
|  | 155 | return new AMDGPUAlwaysInline(GlobalOpt); | 
| Tom Stellard | 5cbb53c | 2014-11-03 19:49:05 +0000 | [diff] [blame] | 156 | } | 
| Matt Arsenault | a680199 | 2018-07-10 14:03:41 +0000 | [diff] [blame] | 157 |  |