Blame - clang/lib/CodeGen/CGCUDANV.cpp - toolchain/llvm-project

blob: c6788091cace75c6764280f076e9ac215261482c [file] [log] [blame]

Peter Collingbourne	fe88342	2011-10-06 18:29:37 +0000	[diff] [blame]	1	//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This provides a class for CUDA code generation targeting the NVIDIA CUDA
				11	// runtime library.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "CGCUDARuntime.h"
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	16	#include "CodeGenFunction.h"
				17	#include "CodeGenModule.h"
				18	#include "clang/AST/Decl.h"
Chandler Carruth	ffd5551	2013-01-02 11:45:17 +0000	[diff] [blame]	19	#include "llvm/IR/BasicBlock.h"
Chandler Carruth	c80ceea	2014-03-04 11:02:08 +0000	[diff] [blame]	20	#include "llvm/IR/CallSite.h"
Chandler Carruth	ffd5551	2013-01-02 11:45:17 +0000	[diff] [blame]	21	#include "llvm/IR/Constants.h"
				22	#include "llvm/IR/DerivedTypes.h"
Peter Collingbourne	fe88342	2011-10-06 18:29:37 +0000	[diff] [blame]	23
				24	using namespace clang;
				25	using namespace CodeGen;
				26
				27	namespace {
				28
				29	class CGNVCUDARuntime : public CGCUDARuntime {
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	30
				31	private:
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	32	llvm::Type IntTy, SizeTy, *VoidTy;
				33	llvm::PointerType CharPtrTy, VoidPtrTy, *VoidPtrPtrTy;
				34
				35	/// Convenience reference to LLVM Context
				36	llvm::LLVMContext &Context;
				37	/// Convenience reference to the current module
				38	llvm::Module &TheModule;
				39	/// Keeps track of kernel launch stubs emitted in this module
				40	llvm::SmallVector<llvm::Function *, 16> EmittedKernels;
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	41	llvm::SmallVector<std::pair<llvm::GlobalVariable *, unsigned>, 16> DeviceVars;
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	42	/// Keeps track of variables containing handles of GPU binaries. Populated by
				43	/// ModuleCtorFunction() and used to create corresponding cleanup calls in
				44	/// ModuleDtorFunction()
				45	llvm::SmallVector<llvm::GlobalVariable *, 16> GpuBinaryHandles;
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	46
				47	llvm::Constant *getSetupArgumentFn() const;
				48	llvm::Constant *getLaunchFn() const;
				49
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	50	/// Creates a function to register all kernel stubs generated in this module.
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	51	llvm::Function *makeRegisterGlobalsFn();
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	52
				53	/// Helper function that generates a constant string and returns a pointer to
				54	/// the start of the string. The result of this function can be used anywhere
				55	/// where the C code specifies const char*.
				56	llvm::Constant *makeConstantString(const std::string &Str,
				57	const std::string &Name = "",
				58	unsigned Alignment = 0) {
				59	llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
				60	llvm::ConstantInt::get(SizeTy, 0)};
John McCall	7f416cc	2015-09-08 08:05:57 +0000	[diff] [blame]	61	auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
				62	return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
				63	ConstStr.getPointer(), Zeros);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	64	}
				65
				66	void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
				67
Peter Collingbourne	fe88342	2011-10-06 18:29:37 +0000	[diff] [blame]	68	public:
				69	CGNVCUDARuntime(CodeGenModule &CGM);
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	70
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	71	void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	72	void registerDeviceVar(llvm::GlobalVariable &Var, unsigned Flags) override {
				73	DeviceVars.push_back(std::make_pair(&Var, Flags));
				74	}
				75
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	76	/// Creates module constructor function
				77	llvm::Function *makeModuleCtorFunction() override;
				78	/// Creates module destructor function
				79	llvm::Function *makeModuleDtorFunction() override;
Peter Collingbourne	fe88342	2011-10-06 18:29:37 +0000	[diff] [blame]	80	};
				81
Alexander Kornienko	ab9db51	2015-06-22 23:07:51 +0000	[diff] [blame]	82	}
Peter Collingbourne	fe88342	2011-10-06 18:29:37 +0000	[diff] [blame]	83
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	84	CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
				85	: CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
				86	TheModule(CGM.getModule()) {
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	87	CodeGen::CodeGenTypes &Types = CGM.getTypes();
				88	ASTContext &Ctx = CGM.getContext();
				89
				90	IntTy = Types.ConvertType(Ctx.IntTy);
				91	SizeTy = Types.ConvertType(Ctx.getSizeType());
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	92	VoidTy = llvm::Type::getVoidTy(Context);
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	93
				94	CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
				95	VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	96	VoidPtrPtrTy = VoidPtrTy->getPointerTo();
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	97	}
				98
				99	llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
				100	// cudaError_t cudaSetupArgument(void *, size_t, size_t)
				101	std::vector<llvm::Type*> Params;
				102	Params.push_back(VoidPtrTy);
				103	Params.push_back(SizeTy);
				104	Params.push_back(SizeTy);
				105	return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
				106	Params, false),
				107	"cudaSetupArgument");
				108	}
				109
				110	llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
				111	// cudaError_t cudaLaunch(char *)
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	112	return CGM.CreateRuntimeFunction(
				113	llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	114	}
				115
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	116	void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
				117	FunctionArgList &Args) {
				118	EmittedKernels.push_back(CGF.CurFn);
				119	emitDeviceStubBody(CGF, Args);
				120	}
				121
				122	void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	123	FunctionArgList &Args) {
				124	// Build the argument value list and the argument stack struct type.
Dmitri Gribenko	f857950	2013-01-12 19:30:44 +0000	[diff] [blame]	125	SmallVector<llvm::Value *, 16> ArgValues;
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	126	std::vector<llvm::Type *> ArgTypes;
				127	for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
				128	I != E; ++I) {
John McCall	7f416cc	2015-09-08 08:05:57 +0000	[diff] [blame]	129	llvm::Value V = CGF.GetAddrOfLocalVar(I).getPointer();
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	130	ArgValues.push_back(V);
				131	assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
				132	ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
				133	}
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	134	llvm::StructType *ArgStackTy = llvm::StructType::get(Context, ArgTypes);
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	135
				136	llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
				137
				138	// Emit the calls to cudaSetupArgument
				139	llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
				140	for (unsigned I = 0, E = Args.size(); I != E; ++I) {
				141	llvm::Value *Args[3];
				142	llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
				143	Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
				144	Args[1] = CGF.Builder.CreateIntCast(
				145	llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
				146	SizeTy, false);
				147	Args[2] = CGF.Builder.CreateIntCast(
				148	llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
				149	SizeTy, false);
John McCall	882987f	2013-02-28 19:01:20 +0000	[diff] [blame]	150	llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	151	llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
				152	llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
				153	CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
				154	CGF.EmitBlock(NextBlock);
				155	}
				156
				157	// Emit the call to cudaLaunch
				158	llvm::Constant *cudaLaunchFn = getLaunchFn();
				159	llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
John McCall	882987f	2013-02-28 19:01:20 +0000	[diff] [blame]	160	CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
Peter Collingbourne	fa4d603	2011-10-06 18:51:56 +0000	[diff] [blame]	161	CGF.EmitBranch(EndBlock);
				162
				163	CGF.EmitBlock(EndBlock);
Peter Collingbourne	fe88342	2011-10-06 18:29:37 +0000	[diff] [blame]	164	}
				165
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	166	/// Creates a function that sets up state on the host side for CUDA objects that
				167	/// have a presence on both the host and device sides. Specifically, registers
				168	/// the host side of kernel functions and device global variables with the CUDA
				169	/// runtime.
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	170	/// \code
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	171	/// void __cuda_register_globals(void** GpuBinaryHandle) {
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	172	/// __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...);
				173	/// ...
				174	/// __cudaRegisterFunction(GpuBinaryHandle,KernelM,...);
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	175	/// __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...);
				176	/// ...
				177	/// __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	178	/// }
				179	/// \endcode
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	180	llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
Artem Belevich	8c1ec1e	2016-03-02 18:28:53 +0000	[diff] [blame]	181	// No need to register anything
				182	if (EmittedKernels.empty() && DeviceVars.empty())
				183	return nullptr;
				184
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	185	llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
				186	llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	187	llvm::GlobalValue::InternalLinkage, "__cuda_register_globals", &TheModule);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	188	llvm::BasicBlock *EntryBB =
				189	llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
John McCall	7f416cc	2015-09-08 08:05:57 +0000	[diff] [blame]	190	CGBuilderTy Builder(CGM, Context);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	191	Builder.SetInsertPoint(EntryBB);
				192
				193	// void __cudaRegisterFunction(void *, const char , char , const char ,
				194	// int, uint3, uint3, dim3, dim3, int*)
				195	std::vector<llvm::Type *> RegisterFuncParams = {
				196	VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy,
				197	VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
				198	llvm::Constant *RegisterFunc = CGM.CreateRuntimeFunction(
				199	llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
				200	"__cudaRegisterFunction");
				201
				202	// Extract GpuBinaryHandle passed as the first argument passed to
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	203	// __cuda_register_globals() and generate __cudaRegisterFunction() call for
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	204	// each emitted kernel.
				205	llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
				206	for (llvm::Function *Kernel : EmittedKernels) {
				207	llvm::Constant *KernelName = makeConstantString(Kernel->getName());
				208	llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	209	llvm::Value *Args[] = {
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	210	&GpuBinaryHandlePtr, Builder.CreateBitCast(Kernel, VoidPtrTy),
				211	KernelName, KernelName, llvm::ConstantInt::get(IntTy, -1), NullPtr,
				212	NullPtr, NullPtr, NullPtr,
				213	llvm::ConstantPointerNull::get(IntTy->getPointerTo())};
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	214	Builder.CreateCall(RegisterFunc, Args);
				215	}
				216
				217	// void __cudaRegisterVar(void *, char , char , const char ,
				218	// int, int, int, int)
				219	std::vector<llvm::Type *> RegisterVarParams = {
				220	VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy,
				221	IntTy, IntTy, IntTy, IntTy};
				222	llvm::Constant *RegisterVar = CGM.CreateRuntimeFunction(
				223	llvm::FunctionType::get(IntTy, RegisterVarParams, false),
				224	"__cudaRegisterVar");
				225	for (auto &Pair : DeviceVars) {
				226	llvm::GlobalVariable *Var = Pair.first;
				227	unsigned Flags = Pair.second;
				228	llvm::Constant *VarName = makeConstantString(Var->getName());
				229	uint64_t VarSize =
				230	CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
				231	llvm::Value *Args[] = {
				232	&GpuBinaryHandlePtr,
				233	Builder.CreateBitCast(Var, VoidPtrTy),
				234	VarName,
				235	VarName,
				236	llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0),
				237	llvm::ConstantInt::get(IntTy, VarSize),
				238	llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0),
				239	llvm::ConstantInt::get(IntTy, 0)};
				240	Builder.CreateCall(RegisterVar, Args);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	241	}
				242
				243	Builder.CreateRetVoid();
				244	return RegisterKernelsFunc;
				245	}
				246
				247	/// Creates a global constructor function for the module:
				248	/// \code
				249	/// void __cuda_module_ctor(void*) {
				250	/// Handle0 = __cudaRegisterFatBinary(GpuBinaryBlob0);
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	251	/// __cuda_register_globals(Handle0);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	252	/// ...
				253	/// HandleN = __cudaRegisterFatBinary(GpuBinaryBlobN);
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	254	/// __cuda_register_globals(HandleN);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	255	/// }
				256	/// \endcode
				257	llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
Artem Belevich	8c1ec1e	2016-03-02 18:28:53 +0000	[diff] [blame]	258	// No need to generate ctors/dtors if there are no GPU binaries.
				259	if (CGM.getCodeGenOpts().CudaGpuBinaryFileNames.empty())
				260	return nullptr;
				261
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	262	// void __cuda_register_globals(void* handle);
				263	llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	264	// void ** __cudaRegisterFatBinary(void *);
				265	llvm::Constant *RegisterFatbinFunc = CGM.CreateRuntimeFunction(
				266	llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
				267	"__cudaRegisterFatBinary");
				268	// struct { int magic, int version, void * gpu_binary, void * dont_care };
				269	llvm::StructType *FatbinWrapperTy =
				270	llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy, nullptr);
				271
				272	llvm::Function *ModuleCtorFunc = llvm::Function::Create(
				273	llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
				274	llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
				275	llvm::BasicBlock *CtorEntryBB =
				276	llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
John McCall	7f416cc	2015-09-08 08:05:57 +0000	[diff] [blame]	277	CGBuilderTy CtorBuilder(CGM, Context);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	278
				279	CtorBuilder.SetInsertPoint(CtorEntryBB);
				280
				281	// For each GPU binary, register it with the CUDA runtime and store returned
				282	// handle in a global variable and save the handle in GpuBinaryHandles vector
				283	// to be cleaned up in destructor on exit. Then associate all known kernels
				284	// with the GPU binary handle so CUDA runtime can figure out what to call on
				285	// the GPU side.
				286	for (const std::string &GpuBinaryFileName :
				287	CGM.getCodeGenOpts().CudaGpuBinaryFileNames) {
				288	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GpuBinaryOrErr =
				289	llvm::MemoryBuffer::getFileOrSTDIN(GpuBinaryFileName);
				290	if (std::error_code EC = GpuBinaryOrErr.getError()) {
				291	CGM.getDiags().Report(diag::err_cannot_open_file) << GpuBinaryFileName
				292	<< EC.message();
				293	continue;
				294	}
				295
				296	// Create initialized wrapper structure that points to the loaded GPU binary
				297	llvm::Constant *Values[] = {
				298	llvm::ConstantInt::get(IntTy, 0x466243b1), // Fatbin wrapper magic.
				299	llvm::ConstantInt::get(IntTy, 1), // Fatbin version.
				300	makeConstantString(GpuBinaryOrErr.get()->getBuffer(), "", 16), // Data.
				301	llvm::ConstantPointerNull::get(VoidPtrTy)}; // Unused in fatbin v1.
				302	llvm::GlobalVariable *FatbinWrapper = new llvm::GlobalVariable(
				303	TheModule, FatbinWrapperTy, true, llvm::GlobalValue::InternalLinkage,
				304	llvm::ConstantStruct::get(FatbinWrapperTy, Values),
				305	"__cuda_fatbin_wrapper");
Justin Lebar	21e5d4f	2016-01-14 21:41:27 +0000	[diff] [blame]	306	// NVIDIA's cuobjdump looks for fatbins in this section.
				307	FatbinWrapper->setSection(".nvFatBinSegment");
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	308
				309	// GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
				310	llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
				311	RegisterFatbinFunc,
				312	CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
				313	llvm::GlobalVariable *GpuBinaryHandle = new llvm::GlobalVariable(
				314	TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
				315	llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
John McCall	7f416cc	2015-09-08 08:05:57 +0000	[diff] [blame]	316	CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
				317	CGM.getPointerAlign());
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	318
Artem Belevich	42e1949	2016-03-02 18:28:50 +0000	[diff] [blame]	319	// Call __cuda_register_globals(GpuBinaryHandle);
Artem Belevich	8c1ec1e	2016-03-02 18:28:53 +0000	[diff] [blame]	320	if (RegisterGlobalsFunc)
				321	CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	322
				323	// Save GpuBinaryHandle so we can unregister it in destructor.
				324	GpuBinaryHandles.push_back(GpuBinaryHandle);
				325	}
				326
				327	CtorBuilder.CreateRetVoid();
				328	return ModuleCtorFunc;
				329	}
				330
				331	/// Creates a global destructor function that unregisters all GPU code blobs
				332	/// registered by constructor.
				333	/// \code
				334	/// void __cuda_module_dtor(void*) {
				335	/// __cudaUnregisterFatBinary(Handle0);
				336	/// ...
				337	/// __cudaUnregisterFatBinary(HandleN);
				338	/// }
				339	/// \endcode
				340	llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
Artem Belevich	8c1ec1e	2016-03-02 18:28:53 +0000	[diff] [blame]	341	// No need for destructor if we don't have handles to unregister.
				342	if (GpuBinaryHandles.empty())
				343	return nullptr;
				344
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	345	// void __cudaUnregisterFatBinary(void ** handle);
				346	llvm::Constant *UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
				347	llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
				348	"__cudaUnregisterFatBinary");
				349
				350	llvm::Function *ModuleDtorFunc = llvm::Function::Create(
				351	llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
				352	llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule);
				353	llvm::BasicBlock *DtorEntryBB =
				354	llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
John McCall	7f416cc	2015-09-08 08:05:57 +0000	[diff] [blame]	355	CGBuilderTy DtorBuilder(CGM, Context);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	356	DtorBuilder.SetInsertPoint(DtorEntryBB);
				357
				358	for (llvm::GlobalVariable *GpuBinaryHandle : GpuBinaryHandles) {
John McCall	7f416cc	2015-09-08 08:05:57 +0000	[diff] [blame]	359	auto HandleValue =
				360	DtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
				361	DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
Artem Belevich	52cc487	2015-05-07 19:34:16 +0000	[diff] [blame]	362	}
				363
				364	DtorBuilder.CreateRetVoid();
				365	return ModuleDtorFunc;
				366	}
				367
Peter Collingbourne	fe88342	2011-10-06 18:29:37 +0000	[diff] [blame]	368	CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
				369	return new CGNVCUDARuntime(CGM);
				370	}