| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 1 | //===- AMDGPUOpenCLEnqueuedBlockLowering.cpp - Lower enqueued block -------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | // \file | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 11 | // This post-linking pass replaces the function pointer of enqueued | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 12 | // block kernel with a global variable (runtime handle) and adds | 
|  | 13 | // "runtime-handle" attribute to the enqueued block kernel. | 
|  | 14 | // | 
|  | 15 | // In LLVM CodeGen the runtime-handle metadata will be translated to | 
|  | 16 | // RuntimeHandle metadata in code object. Runtime allocates a global buffer | 
|  | 17 | // for each kernel with RuntimeHandel metadata and saves the kernel address | 
|  | 18 | // required for the AQL packet into the buffer. __enqueue_kernel function | 
|  | 19 | // in device library knows that the invoke function pointer in the block | 
|  | 20 | // literal is actually runtime handle and loads the kernel address from it | 
|  | 21 | // and put it into AQL packet for dispatching. | 
|  | 22 | // | 
|  | 23 | // This cannot be done in FE since FE cannot create a unique global variable | 
|  | 24 | // with external linkage across LLVM modules. The global variable with internal | 
|  | 25 | // linkage does not work since optimization passes will try to replace loads | 
|  | 26 | // of the global variable with its initialization value. | 
|  | 27 | // | 
| Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 28 | // It also identifies the kernels directly or indirectly enqueues kernels | 
|  | 29 | // and adds "calls-enqueue-kernel" function attribute to them, which will | 
|  | 30 | // be used to determine whether to emit runtime metadata for the kernel | 
|  | 31 | // enqueue related hidden kernel arguments. | 
|  | 32 | // | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 33 | //===----------------------------------------------------------------------===// | 
|  | 34 |  | 
|  | 35 | #include "AMDGPU.h" | 
| Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 36 | #include "llvm/ADT/DenseSet.h" | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 37 | #include "llvm/ADT/StringRef.h" | 
|  | 38 | #include "llvm/IR/Constants.h" | 
| Yaxun Liu | fb17bf6 | 2018-06-13 17:31:51 +0000 | [diff] [blame] | 39 | #include "llvm/IR/DerivedTypes.h" | 
| Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 40 | #include "llvm/IR/Instructions.h" | 
| Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 41 | #include "llvm/IR/Mangler.h" | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 42 | #include "llvm/IR/Module.h" | 
| Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 43 | #include "llvm/IR/User.h" | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 44 | #include "llvm/Pass.h" | 
|  | 45 | #include "llvm/Support/Debug.h" | 
|  | 46 | #include "llvm/Support/raw_ostream.h" | 
|  | 47 |  | 
|  | 48 | #define DEBUG_TYPE "amdgpu-lower-enqueued-block" | 
|  | 49 |  | 
|  | 50 | using namespace llvm; | 
|  | 51 |  | 
|  | 52 | namespace { | 
|  | 53 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 54 | /// Lower enqueued blocks. | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 55 | class AMDGPUOpenCLEnqueuedBlockLowering : public ModulePass { | 
|  | 56 | public: | 
|  | 57 | static char ID; | 
|  | 58 |  | 
|  | 59 | explicit AMDGPUOpenCLEnqueuedBlockLowering() : ModulePass(ID) {} | 
|  | 60 |  | 
|  | 61 | private: | 
|  | 62 | bool runOnModule(Module &M) override; | 
|  | 63 | }; | 
|  | 64 |  | 
|  | 65 | } // end anonymous namespace | 
|  | 66 |  | 
|  | 67 | char AMDGPUOpenCLEnqueuedBlockLowering::ID = 0; | 
|  | 68 |  | 
|  | 69 | char &llvm::AMDGPUOpenCLEnqueuedBlockLoweringID = | 
|  | 70 | AMDGPUOpenCLEnqueuedBlockLowering::ID; | 
|  | 71 |  | 
|  | 72 | INITIALIZE_PASS(AMDGPUOpenCLEnqueuedBlockLowering, DEBUG_TYPE, | 
|  | 73 | "Lower OpenCL enqueued blocks", false, false) | 
|  | 74 |  | 
|  | 75 | ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() { | 
|  | 76 | return new AMDGPUOpenCLEnqueuedBlockLowering(); | 
|  | 77 | } | 
|  | 78 |  | 
| Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 79 | /// Collect direct or indrect callers of \p F and save them | 
|  | 80 | /// to \p Callers. | 
|  | 81 | static void collectCallers(Function *F, DenseSet<Function *> &Callers) { | 
|  | 82 | for (auto U : F->users()) { | 
|  | 83 | if (auto *CI = dyn_cast<CallInst>(&*U)) { | 
|  | 84 | auto *Caller = CI->getParent()->getParent(); | 
| Yaxun Liu | 9381ae9 | 2018-04-11 14:46:15 +0000 | [diff] [blame] | 85 | if (Callers.insert(Caller).second) | 
|  | 86 | collectCallers(Caller, Callers); | 
| Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 87 | } | 
|  | 88 | } | 
|  | 89 | } | 
|  | 90 |  | 
| Yaxun Liu | 9381ae9 | 2018-04-11 14:46:15 +0000 | [diff] [blame] | 91 | /// If \p U is instruction or constant, collect functions which directly or | 
|  | 92 | /// indirectly use it. | 
|  | 93 | static void collectFunctionUsers(User *U, DenseSet<Function *> &Funcs) { | 
|  | 94 | if (auto *I = dyn_cast<Instruction>(U)) { | 
|  | 95 | auto *F = I->getParent()->getParent(); | 
|  | 96 | if (Funcs.insert(F).second) | 
|  | 97 | collectCallers(F, Funcs); | 
|  | 98 | return; | 
|  | 99 | } | 
|  | 100 | if (!isa<Constant>(U)) | 
|  | 101 | return; | 
|  | 102 | for (auto UU : U->users()) | 
|  | 103 | collectFunctionUsers(&*UU, Funcs); | 
|  | 104 | } | 
|  | 105 |  | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 106 | bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { | 
| Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 107 | DenseSet<Function *> Callers; | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 108 | auto &C = M.getContext(); | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 109 | bool Changed = false; | 
|  | 110 | for (auto &F : M.functions()) { | 
|  | 111 | if (F.hasFnAttribute("enqueued-block")) { | 
| Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 112 | if (!F.hasName()) { | 
|  | 113 | SmallString<64> Name; | 
|  | 114 | Mangler::getNameWithPrefix(Name, "__amdgpu_enqueued_kernel", | 
|  | 115 | M.getDataLayout()); | 
|  | 116 | F.setName(Name); | 
|  | 117 | } | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 118 | LLVM_DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n'); | 
| Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 119 | auto RuntimeHandle = (F.getName() + ".runtime_handle").str(); | 
| Yaxun Liu | fb17bf6 | 2018-06-13 17:31:51 +0000 | [diff] [blame] | 120 | auto T = ArrayType::get(Type::getInt64Ty(C), 2); | 
| Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 121 | auto *GV = new GlobalVariable( | 
| Yaxun Liu | 9381ae9 | 2018-04-11 14:46:15 +0000 | [diff] [blame] | 122 | M, T, | 
|  | 123 | /*IsConstant=*/false, GlobalValue::ExternalLinkage, | 
|  | 124 | /*Initializer=*/Constant::getNullValue(T), RuntimeHandle, | 
|  | 125 | /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, | 
|  | 126 | AMDGPUAS::GLOBAL_ADDRESS, | 
|  | 127 | /*IsExternallyInitialized=*/false); | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 128 | LLVM_DEBUG(dbgs() << "runtime handle created: " << *GV << '\n'); | 
| Yaxun Liu | a99e7d8 | 2018-03-12 16:34:06 +0000 | [diff] [blame] | 129 |  | 
| Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 130 | for (auto U : F.users()) { | 
| Yaxun Liu | 9381ae9 | 2018-04-11 14:46:15 +0000 | [diff] [blame] | 131 | auto *UU = &*U; | 
|  | 132 | if (!isa<ConstantExpr>(UU)) | 
| Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 133 | continue; | 
| Yaxun Liu | 9381ae9 | 2018-04-11 14:46:15 +0000 | [diff] [blame] | 134 | collectFunctionUsers(UU, Callers); | 
|  | 135 | auto *BitCast = cast<ConstantExpr>(UU); | 
| Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 136 | auto *NewPtr = ConstantExpr::getPointerCast(GV, BitCast->getType()); | 
|  | 137 | BitCast->replaceAllUsesWith(NewPtr); | 
|  | 138 | F.addFnAttr("runtime-handle", RuntimeHandle); | 
|  | 139 | F.setLinkage(GlobalValue::ExternalLinkage); | 
| Yaxun Liu | 46439e8 | 2018-03-06 16:04:39 +0000 | [diff] [blame] | 140 | Changed = true; | 
| Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 141 | } | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 142 | } | 
|  | 143 | } | 
| Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 144 |  | 
|  | 145 | for (auto F : Callers) { | 
|  | 146 | if (F->getCallingConv() != CallingConv::AMDGPU_KERNEL) | 
|  | 147 | continue; | 
|  | 148 | F->addFnAttr("calls-enqueue-kernel"); | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 149 | LLVM_DEBUG(dbgs() << "mark enqueue_kernel caller:" << F->getName() << '\n'); | 
| Yaxun Liu | c928f2a | 2017-10-30 14:30:28 +0000 | [diff] [blame] | 150 | } | 
| Yaxun Liu | de4b88d | 2017-10-10 19:39:48 +0000 | [diff] [blame] | 151 | return Changed; | 
|  | 152 | } |