blob: 88a0bdc821d7ccf52d2e2eb5d2f7d4a3fccafc12 [file] [log] [blame]
Peter Collingbourne6c0aa5f2011-10-06 18:29:37 +00001//===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This provides a class for CUDA code generation targeting the NVIDIA CUDA
11// runtime library.
12//
13//===----------------------------------------------------------------------===//
14
15#include "CGCUDARuntime.h"
Peter Collingbournea4ae2292011-10-06 18:51:56 +000016#include "CodeGenFunction.h"
17#include "CodeGenModule.h"
18#include "clang/AST/Decl.h"
19#include "llvm/BasicBlock.h"
20#include "llvm/Constants.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Support/CallSite.h"
23
24#include <vector>
Peter Collingbourne6c0aa5f2011-10-06 18:29:37 +000025
26using namespace clang;
27using namespace CodeGen;
28
29namespace {
30
31class CGNVCUDARuntime : public CGCUDARuntime {
Peter Collingbournea4ae2292011-10-06 18:51:56 +000032
33private:
34 llvm::Type *IntTy, *SizeTy;
35 llvm::PointerType *CharPtrTy, *VoidPtrTy;
36
37 llvm::Constant *getSetupArgumentFn() const;
38 llvm::Constant *getLaunchFn() const;
39
Peter Collingbourne6c0aa5f2011-10-06 18:29:37 +000040public:
41 CGNVCUDARuntime(CodeGenModule &CGM);
Peter Collingbournea4ae2292011-10-06 18:51:56 +000042
43 void EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
Peter Collingbourne6c0aa5f2011-10-06 18:29:37 +000044};
45
46}
47
48CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) : CGCUDARuntime(CGM) {
Peter Collingbournea4ae2292011-10-06 18:51:56 +000049 CodeGen::CodeGenTypes &Types = CGM.getTypes();
50 ASTContext &Ctx = CGM.getContext();
51
52 IntTy = Types.ConvertType(Ctx.IntTy);
53 SizeTy = Types.ConvertType(Ctx.getSizeType());
54
55 CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
56 VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
57}
58
59llvm::Constant *CGNVCUDARuntime::getSetupArgumentFn() const {
60 // cudaError_t cudaSetupArgument(void *, size_t, size_t)
61 std::vector<llvm::Type*> Params;
62 Params.push_back(VoidPtrTy);
63 Params.push_back(SizeTy);
64 Params.push_back(SizeTy);
65 return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
66 Params, false),
67 "cudaSetupArgument");
68}
69
70llvm::Constant *CGNVCUDARuntime::getLaunchFn() const {
71 // cudaError_t cudaLaunch(char *)
72 std::vector<llvm::Type*> Params;
73 Params.push_back(CharPtrTy);
74 return CGM.CreateRuntimeFunction(llvm::FunctionType::get(IntTy,
75 Params, false),
76 "cudaLaunch");
77}
78
79void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF,
80 FunctionArgList &Args) {
81 // Build the argument value list and the argument stack struct type.
82 llvm::SmallVector<llvm::Value *, 16> ArgValues;
83 std::vector<llvm::Type *> ArgTypes;
84 for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
85 I != E; ++I) {
86 llvm::Value *V = CGF.GetAddrOfLocalVar(*I);
87 ArgValues.push_back(V);
88 assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
89 ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
90 }
91 llvm::StructType *ArgStackTy = llvm::StructType::get(
92 CGF.getLLVMContext(), ArgTypes);
93
94 llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
95
96 // Emit the calls to cudaSetupArgument
97 llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
98 for (unsigned I = 0, E = Args.size(); I != E; ++I) {
99 llvm::Value *Args[3];
100 llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
101 Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
102 Args[1] = CGF.Builder.CreateIntCast(
103 llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
104 SizeTy, false);
105 Args[2] = CGF.Builder.CreateIntCast(
106 llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
107 SizeTy, false);
108 llvm::CallSite CS = CGF.EmitCallOrInvoke(cudaSetupArgFn, Args);
109 llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
110 llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
111 CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
112 CGF.EmitBlock(NextBlock);
113 }
114
115 // Emit the call to cudaLaunch
116 llvm::Constant *cudaLaunchFn = getLaunchFn();
117 llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy);
118 CGF.EmitCallOrInvoke(cudaLaunchFn, Arg);
119 CGF.EmitBranch(EndBlock);
120
121 CGF.EmitBlock(EndBlock);
Peter Collingbourne6c0aa5f2011-10-06 18:29:37 +0000122}
123
124CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
125 return new CGNVCUDARuntime(CGM);
126}