blob: 641405aa5b430004f397fbd6c7cf900b4ee6637a [file] [log] [blame]
Peter Collingbournefe883422011-10-06 18:29:37 +00001//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This provides a class for CUDA code generation targeting the NVIDIA CUDA
11// runtime library.
12//
13//===----------------------------------------------------------------------===//
14
15#include "CGCUDARuntime.h"
Peter Collingbournefa4d6032011-10-06 18:51:56 +000016#include "CodeGenFunction.h"
17#include "CodeGenModule.h"
18#include "clang/AST/Decl.h"
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000019#include "clang/CodeGen/ConstantInitBuilder.h"
Chandler Carruthffd55512013-01-02 11:45:17 +000020#include "llvm/IR/BasicBlock.h"
Chandler Carruthc80ceea2014-03-04 11:02:08 +000021#include "llvm/IR/CallSite.h"
Chandler Carruthffd55512013-01-02 11:45:17 +000022#include "llvm/IR/Constants.h"
23#include "llvm/IR/DerivedTypes.h"
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000024#include "llvm/Support/Format.h"
Peter Collingbournefe883422011-10-06 18:29:37 +000025
26using namespace clang;
27using namespace CodeGen;
28
29namespace {
30
31class CGNVCUDARuntime : public CGCUDARuntime {
Peter Collingbournefa4d6032011-10-06 18:51:56 +000032
33private:
John McCall6c9f1fdb2016-11-19 08:17:24 +000034 llvm::IntegerType *IntTy, *SizeTy;
35 llvm::Type *VoidTy;
Artem Belevich52cc4872015-05-07 19:34:16 +000036 llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
37
38 /// Convenience reference to LLVM Context
39 llvm::LLVMContext &Context;
40 /// Convenience reference to the current module
41 llvm::Module &TheModule;
42 /// Keeps track of kernel launch stubs emitted in this module
43 llvm::SmallVector<llvm::Function *, 16> EmittedKernels;
Artem Belevich42e19492016-03-02 18:28:50 +000044 llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars;
Jonas Hahnfelde7681322018-02-28 17:53:46 +000045 /// Keeps track of variable containing handle of GPU binary. Populated by
Artem Belevich52cc4872015-05-07 19:34:16 +000046 /// ModuleCtorFunction() and used to create corresponding cleanup calls in
47 /// ModuleDtorFunction()
Jonas Hahnfelde7681322018-02-28 17:53:46 +000048 llvm::GlobalVariable *GpuBinaryHandle = nullptr;
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000049 /// Whether we generate relocatable device code.
50 bool RelocatableDeviceCode;
Peter Collingbournefa4d6032011-10-06 18:51:56 +000051
52 llvm::Constant *getSetupArgumentFn() const;
53 llvm::Constant *getLaunchFn() const;
54
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000055 llvm::FunctionType *getRegisterGlobalsFnTy() const;
56 llvm::FunctionType *getCallbackFnTy() const;
57 llvm::FunctionType *getRegisterLinkedBinaryFnTy() const;
58
Artem Belevich52cc4872015-05-07 19:34:16 +000059 /// Creates a function to register all kernel stubs generated in this module.
Artem Belevich42e19492016-03-02 18:28:50 +000060 llvm::Function *makeRegisterGlobalsFn();
Artem Belevich52cc4872015-05-07 19:34:16 +000061
62 /// Helper function that generates a constant string and returns a pointer to
63 /// the start of the string. The result of this function can be used anywhere
64 /// where the C code specifies const char*.
65 llvm::Constant *makeConstantString(const std::string &Str,
66 const std::string &Name = "",
Artem Belevich4c093182016-08-12 18:44:01 +000067 const std::string &SectionName = "",
Artem Belevich52cc4872015-05-07 19:34:16 +000068 unsigned Alignment = 0) {
69 llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
70 llvm::ConstantInt::get(SizeTy, 0)};
John McCall7f416cc2015-09-08 08:05:57 +000071 auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
Artem Belevich4c093182016-08-12 18:44:01 +000072 llvm::GlobalVariable *GV =
73 cast<llvm::GlobalVariable>(ConstStr.getPointer());
74 if (!SectionName.empty())
75 GV->setSection(SectionName);
76 if (Alignment)
77 GV->setAlignment(Alignment);
78
John McCall7f416cc2015-09-08 08:05:57 +000079 return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
80 ConstStr.getPointer(), Zeros);
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +000081 }
82
83 /// Helper function that generates an empty dummy function returning void.
84 llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) {
85 assert(FnTy->getReturnType()->isVoidTy() &&
86 "Can only generate dummy functions returning void!");
87 llvm::Function *DummyFunc = llvm::Function::Create(
88 FnTy, llvm::GlobalValue::InternalLinkage, "dummy", &TheModule);
89
90 llvm::BasicBlock *DummyBlock =
91 llvm::BasicBlock::Create(Context, "", DummyFunc);
92 CGBuilderTy FuncBuilder(CGM, Context);
93 FuncBuilder.SetInsertPoint(DummyBlock);
94 FuncBuilder.CreateRetVoid();
95
96 return DummyFunc;
97 }
Artem Belevich52cc4872015-05-07 19:34:16 +000098
99 void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
100
Peter Collingbournefe883422011-10-06 18:29:37 +0000101public:
102 CGNVCUDARuntime(CodeGenModule &CGM);
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000103
Artem Belevich52cc4872015-05-07 19:34:16 +0000104 void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
Artem Belevich42e19492016-03-02 18:28:50 +0000105 void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override {
106 DeviceVars.push_back(std::make_pair(&Var, Flags));
107 }
108
Artem Belevich52cc4872015-05-07 19:34:16 +0000109 /// Creates module constructor function
110 llvm::Function *makeModuleCtorFunction() override;
111 /// Creates module destructor function
112 llvm::Function *makeModuleDtorFunction() override;
Peter Collingbournefe883422011-10-06 18:29:37 +0000113};
114
Alexander Kornienkoab9db512015-06-22 23:07:51 +0000115}
Peter Collingbournefe883422011-10-06 18:29:37 +0000116
Artem Belevich52cc4872015-05-07 19:34:16 +0000117CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
118 : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000119 TheModule(CGM.getModule()),
120 RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) {
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000121 CodeGen::CodeGenTypes &Types = CGM.getTypes();
122 ASTContext &Ctx = CGM.getContext();
123
John McCall6c9f1fdb2016-11-19 08:17:24 +0000124 IntTy = CGM.IntTy;
125 SizeTy = CGM.SizeTy;
126 VoidTy = CGM.VoidTy;
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000127
128 CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
129 VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
Artem Belevich52cc4872015-05-07 19:34:16 +0000130 VoidPtrPtrTy = VoidPtrTy->getPointerTo();
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000131}
132
133llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
134 // cudaError_t cudaSetupArgument(void *, size_t, size_t)
Benjamin Kramer30934732016-07-02 11:41:41 +0000135 llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000136 return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
137 Params, false),
138 "cudaSetupArgument");
139}
140
141llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
142 // cudaError_t cudaLaunch(char *)
Artem Belevich52cc4872015-05-07 19:34:16 +0000143 return CGM.CreateRuntimeFunction(
144 llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000145}
146
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000147llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
148 return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
149}
150
151llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
152 return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
153}
154
155llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
156 auto CallbackFnTy = getCallbackFnTy();
157 auto RegisterGlobalsFnTy = getRegisterGlobalsFnTy();
158 llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy,
159 VoidPtrTy, CallbackFnTy->getPointerTo()};
160 return llvm::FunctionType::get(VoidTy, Params, false);
161}
162
Artem Belevich52cc4872015-05-07 19:34:16 +0000163void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
164 FunctionArgList &Args) {
165 EmittedKernels.push_back(CGF.CurFn);
166 emitDeviceStubBody(CGF, Args);
167}
168
169void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000170 FunctionArgList &Args) {
Justin Lebare56360a2016-07-27 22:36:21 +0000171 // Emit a call to cudaSetupArgument for each arg in Args.
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000172 llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
Justin Lebare56360a2016-07-27 22:36:21 +0000173 llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
174 CharUnits Offset = CharUnits::Zero();
175 for (const VarDecl *A : Args) {
176 CharUnits TyWidth, TyAlign;
177 std::tie(TyWidth, TyAlign) =
178 CGM.getContext().getTypeInfoInChars(A->getType());
179 Offset = Offset.alignTo(TyAlign);
180 llvm::Value *Args[] = {
181 CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
182 VoidPtrTy),
183 llvm::ConstantInt::get(SizeTy, TyWidth.getQuantity()),
184 llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
185 };
John McCall882987f2013-02-28 19:01:20 +0000186 llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000187 llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
188 llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
Justin Lebare56360a2016-07-27 22:36:21 +0000189 llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000190 CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
191 CGF.EmitBlock(NextBlock);
Justin Lebare56360a2016-07-27 22:36:21 +0000192 Offset += TyWidth;
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000193 }
194
195 // Emit the call to cudaLaunch
196 llvm::Constant *cudaLaunchFn = getLaunchFn();
197 llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
John McCall882987f2013-02-28 19:01:20 +0000198 CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
Peter Collingbournefa4d6032011-10-06 18:51:56 +0000199 CGF.EmitBranch(EndBlock);
200
201 CGF.EmitBlock(EndBlock);
Peter Collingbournefe883422011-10-06 18:29:37 +0000202}
203
Artem Belevich42e19492016-03-02 18:28:50 +0000204/// Creates a function that sets up state on the host side for CUDA objects that
205/// have a presence on both the host and device sides. Specifically, registers
206/// the host side of kernel functions and device global variables with the CUDA
207/// runtime.
Artem Belevich52cc4872015-05-07 19:34:16 +0000208/// \code
Artem Belevich42e19492016-03-02 18:28:50 +0000209/// void __cuda_register_globals(void** GpuBinaryHandle) {
Artem Belevich52cc4872015-05-07 19:34:16 +0000210/// __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...);
211/// ...
212/// __cudaRegisterFunction(GpuBinaryHandle,KernelM,...);
Artem Belevich42e19492016-03-02 18:28:50 +0000213/// __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...);
214/// ...
215/// __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...);
Artem Belevich52cc4872015-05-07 19:34:16 +0000216/// }
217/// \endcode
Artem Belevich42e19492016-03-02 18:28:50 +0000218llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000219 // No need to register anything
220 if (EmittedKernels.empty() && DeviceVars.empty())
221 return nullptr;
222
Artem Belevich52cc4872015-05-07 19:34:16 +0000223 llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000224 getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,
225 "__cuda_register_globals", &TheModule);
Artem Belevich52cc4872015-05-07 19:34:16 +0000226 llvm::BasicBlock *EntryBB =
227 llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
John McCall7f416cc2015-09-08 08:05:57 +0000228 CGBuilderTy Builder(CGM, Context);
Artem Belevich52cc4872015-05-07 19:34:16 +0000229 Builder.SetInsertPoint(EntryBB);
230
231 // void __cudaRegisterFunction(void **, const char *, char *, const char *,
232 // int, uint3*, uint3*, dim3*, dim3*, int*)
Benjamin Kramer6d1c10b2016-07-02 12:03:57 +0000233 llvm::Type *RegisterFuncParams[] = {
Artem Belevich52cc4872015-05-07 19:34:16 +0000234 VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy,
235 VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
236 llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction(
237 llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
238 "__cudaRegisterFunction");
239
240 // Extract GpuBinaryHandle passed as the first argument passed to
Artem Belevich42e19492016-03-02 18:28:50 +0000241 // __cuda_register_globals() and generate __cudaRegisterFunction() call for
Artem Belevich52cc4872015-05-07 19:34:16 +0000242 // each emitted kernel.
243 llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
244 for (llvm::Function *Kernel : EmittedKernels) {
245 llvm::Constant *KernelName = makeConstantString(Kernel->getName());
246 llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
Artem Belevich42e19492016-03-02 18:28:50 +0000247 llvm::Value *Args[] = {
Artem Belevich52cc4872015-05-07 19:34:16 +0000248 &GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy),
249 KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr,
250 NullPtr, NullPtr, NullPtr,
251 llvm::ConstantPointerNull::get(IntTy->getPointerTo())};
Artem Belevich42e19492016-03-02 18:28:50 +0000252 Builder.CreateCall(RegisterFunc, Args);
253 }
254
255 // void __cudaRegisterVar(void **, char *, char *, const char *,
256 // int, int, int, int)
Benjamin Kramer6d1c10b2016-07-02 12:03:57 +0000257 llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
258 CharPtrTy, IntTy, IntTy,
259 IntTy, IntTy};
Artem Belevich42e19492016-03-02 18:28:50 +0000260 llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction(
261 llvm::FunctionType::get(IntTy, RegisterVarParams, false),
262 "__cudaRegisterVar");
263 for (auto &Pair : DeviceVars) {
264 llvm::GlobalVariable *Var = Pair.first;
265 unsigned Flags = Pair.second;
266 llvm::Constant *VarName = makeConstantString(Var->getName());
267 uint64_t VarSize =
268 CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
269 llvm::Value *Args[] = {
270 &GpuBinaryHandlePtr,
271 Builder.CreateBitCast(Var, VoidPtrTy),
272 VarName,
273 VarName,
274 llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0),
275 llvm::ConstantInt::get(IntTy, VarSize),
276 llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0),
277 llvm::ConstantInt::get(IntTy, 0)};
278 Builder.CreateCall(RegisterVar, Args);
Artem Belevich52cc4872015-05-07 19:34:16 +0000279 }
280
281 Builder.CreateRetVoid();
282 return RegisterKernelsFunc;
283}
284
285/// Creates a global constructor function for the module:
286/// \code
287/// void __cuda_module_ctor(void*) {
Jonas Hahnfelde7681322018-02-28 17:53:46 +0000288/// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
289/// __cuda_register_globals(Handle);
Artem Belevich52cc4872015-05-07 19:34:16 +0000290/// }
291/// \endcode
292llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
Jonas Hahnfelde7681322018-02-28 17:53:46 +0000293 // No need to generate ctors/dtors if there is no GPU binary.
294 std::string GpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
295 if (GpuBinaryFileName.empty())
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000296 return nullptr;
297
Artem Belevich42e19492016-03-02 18:28:50 +0000298 // void __cuda_register_globals(void* handle);
299 llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000300 // We always need a function to pass in as callback. Create a dummy
301 // implementation if we don't need to register anything.
302 if (RelocatableDeviceCode && !RegisterGlobalsFunc)
303 RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
304
Artem Belevich52cc4872015-05-07 19:34:16 +0000305 // void ** __cudaRegisterFatBinary(void *);
306 llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction(
307 llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
308 "__cudaRegisterFatBinary");
309 // struct { int magic, int version, void * gpu_binary, void * dont_care };
310 llvm::StructType *FatbinWrapperTy =
Serge Guelton1d993272017-05-09 19:31:30 +0000311 llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
Artem Belevich52cc4872015-05-07 19:34:16 +0000312
Jonas Hahnfelde7681322018-02-28 17:53:46 +0000313 // Register GPU binary with the CUDA runtime, store returned handle in a
314 // global variable and save a reference in GpuBinaryHandle to be cleaned up
315 // in destructor on exit. Then associate all known kernels with the GPU binary
316 // handle so CUDA runtime can figure out what to call on the GPU side.
317 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
318 llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
319 if (std::error_code EC = GpuBinaryOrErr.getError()) {
320 CGM.getDiags().Report(diag::err_cannot_open_file)
321 << GpuBinaryFileName << EC.message();
322 return nullptr;
323 }
324
Artem Belevich52cc4872015-05-07 19:34:16 +0000325 llvm::Function *ModuleCtorFunc = llvm::Function::Create(
326 llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
327 llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
328 llvm::BasicBlock *CtorEntryBB =
329 llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
John McCall7f416cc2015-09-08 08:05:57 +0000330 CGBuilderTy CtorBuilder(CGM, Context);
Artem Belevich52cc4872015-05-07 19:34:16 +0000331
332 CtorBuilder.SetInsertPoint(CtorEntryBB);
333
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000334 const char *FatbinConstantName;
335 if (RelocatableDeviceCode)
336 // TODO: Figure out how this is called on mac OS!
337 FatbinConstantName = "__nv_relfatbin";
338 else
339 FatbinConstantName =
340 CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
Jonas Hahnfelde7681322018-02-28 17:53:46 +0000341 // NVIDIA's cuobjdump looks for fatbins in this section.
342 const char *FatbinSectionName =
343 CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000344 // TODO: Figure out how this is called on mac OS!
345 const char *NVModuleIDSectionName = "__nv_module_id";
Artem Belevich52cc4872015-05-07 19:34:16 +0000346
Jonas Hahnfelde7681322018-02-28 17:53:46 +0000347 // Create initialized wrapper structure that points to the loaded GPU binary
348 ConstantInitBuilder Builder(CGM);
349 auto Values = Builder.beginStruct(FatbinWrapperTy);
350 // Fatbin wrapper magic.
351 Values.addInt(IntTy, 0x466243b1);
352 // Fatbin version.
353 Values.addInt(IntTy, 1);
354 // Data.
355 Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(), "",
356 FatbinConstantName, 8));
357 // Unused in fatbin v1.
358 Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
359 llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
360 "__cuda_fatbin_wrapper", CGM.getPointerAlign(),
361 /*constant*/ true);
362 FatbinWrapper->setSection(FatbinSectionName);
Justin Lebard14fe882016-11-18 00:41:31 +0000363
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000364 // Register binary with CUDA runtime. This is substantially different in
365 // default mode vs. separate compilation!
366 if (!RelocatableDeviceCode) {
367 // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
368 llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
369 RegisterFatbinFunc,
370 CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
371 GpuBinaryHandle = new llvm::GlobalVariable(
372 TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
373 llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
374 CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
375 CGM.getPointerAlign());
Artem Belevich52cc4872015-05-07 19:34:16 +0000376
Jonas Hahnfeldf5527c22018-04-20 13:04:45 +0000377 // Call __cuda_register_globals(GpuBinaryHandle);
378 if (RegisterGlobalsFunc)
379 CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
380 } else {
381 // Generate a unique module ID.
382 SmallString<64> NVModuleID;
383 llvm::raw_svector_ostream OS(NVModuleID);
384 OS << "__nv_" << llvm::format("%x", FatbinWrapper->getGUID());
385 llvm::Constant *NVModuleIDConstant =
386 makeConstantString(NVModuleID.str(), "", NVModuleIDSectionName, 32);
387
388 // Create an alias for the FatbinWrapper that nvcc will look for.
389 llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
390 Twine("__fatbinwrap") + NVModuleID,
391 FatbinWrapper);
392
393 // void __cudaRegisterLinkedBinary%NVModuleID%(void (*)(void *), void *,
394 // void *, void (*)(void **))
395 SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary");
396 RegisterLinkedBinaryName += NVModuleID;
397 llvm::Constant *RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction(
398 getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
399
400 assert(RegisterGlobalsFunc && "Expecting at least dummy function!");
401 llvm::Value *Args[] = {RegisterGlobalsFunc,
402 CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy),
403 NVModuleIDConstant,
404 makeDummyFunction(getCallbackFnTy())};
405 CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
406 }
Artem Belevich52cc4872015-05-07 19:34:16 +0000407
408 CtorBuilder.CreateRetVoid();
409 return ModuleCtorFunc;
410}
411
Jonas Hahnfelde7681322018-02-28 17:53:46 +0000412/// Creates a global destructor function that unregisters the GPU code blob
Artem Belevich52cc4872015-05-07 19:34:16 +0000413/// registered by constructor.
414/// \code
415/// void __cuda_module_dtor(void*) {
Jonas Hahnfelde7681322018-02-28 17:53:46 +0000416/// __cudaUnregisterFatBinary(Handle);
Artem Belevich52cc4872015-05-07 19:34:16 +0000417/// }
418/// \endcode
419llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
Jonas Hahnfelde7681322018-02-28 17:53:46 +0000420 // No need for destructor if we don't have a handle to unregister.
421 if (!GpuBinaryHandle)
Artem Belevich8c1ec1e2016-03-02 18:28:53 +0000422 return nullptr;
423
Artem Belevich52cc4872015-05-07 19:34:16 +0000424 // void __cudaUnregisterFatBinary(void ** handle);
425 llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
426 llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
427 "__cudaUnregisterFatBinary");
428
429 llvm::Function *ModuleDtorFunc = llvm::Function::Create(
430 llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
431 llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule);
432 llvm::BasicBlock *DtorEntryBB =
433 llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
John McCall7f416cc2015-09-08 08:05:57 +0000434 CGBuilderTy DtorBuilder(CGM, Context);
Artem Belevich52cc4872015-05-07 19:34:16 +0000435 DtorBuilder.SetInsertPoint(DtorEntryBB);
436
Jonas Hahnfelde7681322018-02-28 17:53:46 +0000437 auto HandleValue =
John McCall7f416cc2015-09-08 08:05:57 +0000438 DtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
Jonas Hahnfelde7681322018-02-28 17:53:46 +0000439 DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
Artem Belevich52cc4872015-05-07 19:34:16 +0000440
441 DtorBuilder.CreateRetVoid();
442 return ModuleDtorFunc;
443}
444
Peter Collingbournefe883422011-10-06 18:29:37 +0000445CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
446 return new CGNVCUDARuntime(CGM);
447}