[OpenCL] Emit enqueued block as kernel
In OpenCL the kernel function and non-kernel function has different calling conventions.
For certain targets they have different argument ABIs. Also kernels have special function
attributes and metadata for runtime to launch them.
The blocks passed to enqueue_kernel is supposed to be executed as kernels. As such,
the block invoke function should be emitted as kernel with proper calling convention and
argument ABI.
This patch emits enqueued block as kernel. If a block is both called directly and passed
to enqueue_kernel, separate functions will be generated.
Differential Revision: https://reviews.llvm.org/D38134
llvm-svn: 315804
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index c4c5541..d15c66d 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -738,16 +738,27 @@
}
/// Emit a block literal expression in the current function.
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
+ llvm::Function **InvokeF) {
// If the block has no captures, we won't have a pre-computed
// layout for it.
if (!blockExpr->getBlockDecl()->hasCaptures()) {
- if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr))
+ // The block literal is emitted as a global variable, and the block invoke
+ // function has to be extracted from its initializer.
+ if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) {
+ if (InvokeF) {
+ auto *GV = cast<llvm::GlobalVariable>(
+ cast<llvm::Constant>(Block)->stripPointerCasts());
+ auto *BlockInit = cast<llvm::ConstantStruct>(GV->getInitializer());
+ *InvokeF = cast<llvm::Function>(
+ BlockInit->getAggregateElement(2)->stripPointerCasts());
+ }
return Block;
+ }
CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
computeBlockInfo(CGM, this, blockInfo);
blockInfo.BlockExpression = blockExpr;
- return EmitBlockLiteral(blockInfo);
+ return EmitBlockLiteral(blockInfo, InvokeF);
}
// Find the block info for this block and take ownership of it.
@@ -756,10 +767,11 @@
blockExpr->getBlockDecl()));
blockInfo->BlockExpression = blockExpr;
- return EmitBlockLiteral(*blockInfo);
+ return EmitBlockLiteral(*blockInfo, InvokeF);
}
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
+ llvm::Function **InvokeF) {
bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
auto GenVoidPtrTy =
IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
@@ -768,9 +780,11 @@
CGM.getTarget().getPointerWidth(GenVoidPtrAddr) / 8);
// Using the computed layout, generate the actual block function.
bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
- llvm::Constant *blockFn = CodeGenFunction(CGM, true).GenerateBlockFunction(
+ auto *InvokeFn = CodeGenFunction(CGM, true).GenerateBlockFunction(
CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
- blockFn = llvm::ConstantExpr::getPointerCast(blockFn, GenVoidPtrTy);
+ if (InvokeF)
+ *InvokeF = InvokeFn;
+ auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
// If there is nothing to capture, we can emit this as a global block.
if (blockInfo.CanBeGlobal)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e8d2c59..1de7c6e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2779,12 +2779,17 @@
// The most basic form of the call with parameters:
// queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
Name = "__enqueue_kernel_basic";
- llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
+ llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
+ GenericVoidPtrTy};
llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
- llvm::Value *Block = Builder.CreatePointerCast(
- EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
+ llvm::Value *Kernel =
+ Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ llvm::Value *Block =
+ Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
AttrBuilder B;
B.addAttribute(Attribute::ByVal);
@@ -2793,7 +2798,7 @@
auto RTCall =
Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
- {Queue, Flags, Range, Block});
+ {Queue, Flags, Range, Kernel, Block});
RTCall->setAttributes(ByValAttrSet);
return RValue::get(RTCall);
}
@@ -2825,21 +2830,23 @@
if (E->getArg(3)->getType()->isBlockPointerType()) {
// No events passed, but has variadic arguments.
Name = "__enqueue_kernel_vaargs";
- auto *Block = Builder.CreatePointerCast(EmitScalarExpr(E->getArg(3)),
- GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
+ llvm::Value *Kernel =
+ Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
auto *PtrToSizeArray = CreateArrayForSizeVar(4);
// Create a vector of the arguments, as well as a constant value to
// express to the runtime the number of variadic arguments.
- std::vector<llvm::Value *> Args = {Queue,
- Flags,
- Range,
- Block,
- ConstantInt::get(IntTy, NumArgs - 4),
- PtrToSizeArray};
- std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy,
- RangeTy, GenericVoidPtrTy,
- IntTy, PtrToSizeArray->getType()};
+ std::vector<llvm::Value *> Args = {
+ Queue, Flags, Range,
+ Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
+ PtrToSizeArray};
+ std::vector<llvm::Type *> ArgTys = {
+ QueueTy, IntTy, RangeTy,
+ GenericVoidPtrTy, GenericVoidPtrTy, IntTy,
+ PtrToSizeArray->getType()};
llvm::FunctionType *FTy = llvm::FunctionType::get(
Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
@@ -2863,15 +2870,19 @@
// Convert to generic address space.
EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
- llvm::Value *Block = Builder.CreatePointerCast(
- EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
+ llvm::Value *Kernel =
+ Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ llvm::Value *Block =
+ Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
std::vector<llvm::Type *> ArgTys = {
- QueueTy, Int32Ty, RangeTy, Int32Ty,
- EventPtrTy, EventPtrTy, GenericVoidPtrTy};
+ QueueTy, Int32Ty, RangeTy, Int32Ty,
+ EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
- std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
- EventList, ClkEvent, Block};
+ std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
+ EventList, ClkEvent, Kernel, Block};
if (NumArgs == 7) {
// Has events but no variadics.
@@ -2905,24 +2916,30 @@
case Builtin::BIget_kernel_work_group_size: {
llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
getContext().getTargetAddressSpace(LangAS::opencl_generic));
- Value *Arg = EmitScalarExpr(E->getArg(0));
- Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
+ Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
return RValue::get(Builder.CreateCall(
CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
+ llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
+ false),
"__get_kernel_work_group_size_impl"),
- Arg));
+ {Kernel, Arg}));
}
case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
getContext().getTargetAddressSpace(LangAS::opencl_generic));
- Value *Arg = EmitScalarExpr(E->getArg(0));
- Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
+ Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
return RValue::get(Builder.CreateCall(
CGM.CreateRuntimeFunction(
- llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
+ llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
+ false),
"__get_kernel_preferred_work_group_multiple_impl"),
- Arg));
+ {Kernel, Arg}));
}
case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
@@ -2930,8 +2947,10 @@
getContext().getTargetAddressSpace(LangAS::opencl_generic));
LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
- Value *Block = EmitScalarExpr(E->getArg(1));
- Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy);
+ auto Info =
+ CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
+ Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
+ Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
const char *Name =
BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
? "__get_kernel_max_sub_group_size_for_ndrange_impl"
@@ -2939,9 +2958,10 @@
return RValue::get(Builder.CreateCall(
CGM.CreateRuntimeFunction(
llvm::FunctionType::get(
- IntTy, {NDRange->getType(), GenericVoidPtrTy}, false),
+ IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
+ false),
Name),
- {NDRange, Block}));
+ {NDRange, Kernel, Block}));
}
case Builtin::BI__builtin_store_half:
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.cpp b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
index 49eab45..4d63761 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -16,6 +16,7 @@
#include "CGOpenCLRuntime.h"
#include "CodeGenFunction.h"
#include "TargetInfo.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include <assert.h>
@@ -110,3 +111,38 @@
CGM.getLLVMContext(),
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
}
+
+CGOpenCLRuntime::EnqueuedBlockInfo
+CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
+ // The block literal may be assigned to a const variable. Chasing down
+ // to get the block literal.
+ if (auto DR = dyn_cast<DeclRefExpr>(E)) {
+ E = cast<VarDecl>(DR->getDecl())->getInit();
+ }
+ if (auto Cast = dyn_cast<CastExpr>(E)) {
+ E = Cast->getSubExpr();
+ }
+ auto *Block = cast<BlockExpr>(E);
+
+ // The same block literal may be enqueued multiple times. Cache it if
+ // possible.
+ auto Loc = EnqueuedBlockMap.find(Block);
+ if (Loc != EnqueuedBlockMap.end()) {
+ return Loc->second;
+ }
+
+ // Emit block literal as a common block expression and get the block invoke
+ // function.
+ llvm::Function *Invoke;
+ auto *V = CGF.EmitBlockLiteral(cast<BlockExpr>(Block), &Invoke);
+ auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel(
+ CGF, Invoke, V->stripPointerCasts());
+
+ // The common part of the post-processing of the kernel goes here.
+ F->addFnAttr(llvm::Attribute::NoUnwind);
+ F->setCallingConv(
+ CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel));
+ EnqueuedBlockInfo Info{F, V};
+ EnqueuedBlockMap[Block] = Info;
+ return Info;
+}
diff --git a/clang/lib/CodeGen/CGOpenCLRuntime.h b/clang/lib/CodeGen/CGOpenCLRuntime.h
index 2df28c2..ead303d 100644
--- a/clang/lib/CodeGen/CGOpenCLRuntime.h
+++ b/clang/lib/CodeGen/CGOpenCLRuntime.h
@@ -17,11 +17,13 @@
#define LLVM_CLANG_LIB_CODEGEN_CGOPENCLRUNTIME_H
#include "clang/AST/Type.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
namespace clang {
+class Expr;
class VarDecl;
namespace CodeGen {
@@ -35,6 +37,14 @@
llvm::Type *PipeTy;
llvm::PointerType *SamplerTy;
+ /// Structure for enqueued block information.
+ struct EnqueuedBlockInfo {
+ llvm::Function *Kernel; /// Enqueued block kernel.
+ llvm::Value *BlockArg; /// The first argument to enqueued block kernel.
+ };
+ /// Maps block expression to block information.
+ llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap;
+
public:
CGOpenCLRuntime(CodeGenModule &CGM) : CGM(CGM), PipeTy(nullptr),
SamplerTy(nullptr) {}
@@ -62,6 +72,10 @@
/// \return __generic void* type.
llvm::PointerType *getGenericVoidPointerType();
+
+ /// \return enqueued block information for enqueued block.
+ EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF,
+ const Expr *E);
};
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 39b3a93ee..79674a1 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1584,7 +1584,14 @@
// Block Bits
//===--------------------------------------------------------------------===//
- llvm::Value *EmitBlockLiteral(const BlockExpr *);
+ /// Emit block literal.
+ /// \return an LLVM value which is a pointer to a struct which contains
+ /// information about the block, including the block invoke function, the
+ /// captured variables, etc.
+ /// \param InvokeF will contain the block invoke function if it is not
+ /// nullptr.
+ llvm::Value *EmitBlockLiteral(const BlockExpr *,
+ llvm::Function **InvokeF = nullptr);
static void destroyBlockInfos(CGBlockInfo *info);
llvm::Function *GenerateBlockFunction(GlobalDecl GD,
@@ -2914,8 +2921,11 @@
LValue EmitOMPSharedLValue(const Expr *E);
private:
- /// Helpers for blocks
- llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
+ /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not
+ /// nullptr. It should be called without \p InvokeF if the caller does not
+ /// need invoke function to be returned.
+ llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info,
+ llvm::Function **InvokeF = nullptr);
/// Helpers for the OpenMP loop directives.
void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false);
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 9d0e3de..d082342 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -164,8 +164,6 @@
llvm::SmallSet<const Type *, 8> RecordsWithOpaqueMemberPointers;
- unsigned ClangCallConvToLLVMCallConv(CallingConv CC);
-
public:
CodeGenTypes(CodeGenModule &cgm);
~CodeGenTypes();
@@ -180,6 +178,9 @@
llvm::LLVMContext &getLLVMContext() { return TheModule.getContext(); }
const CodeGenOptions &getCodeGenOpts() const;
+ /// Convert clang calling convention to LLVM callilng convention.
+ unsigned ClangCallConvToLLVMCallConv(CallingConv CC);
+
/// ConvertType - Convert type T into a llvm::Type.
llvm::Type *ConvertType(QualType T);
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 934e155..bca2780 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -14,6 +14,7 @@
#include "TargetInfo.h"
#include "ABIInfo.h"
+#include "CGBlocks.h"
#include "CGCXXABI.h"
#include "CGValue.h"
#include "CodeGenFunction.h"
@@ -7617,6 +7618,10 @@
const VarDecl *D) const override;
llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
llvm::LLVMContext &C) const override;
+ llvm::Function *
+ createEnqueuedBlockKernel(CodeGenFunction &CGF,
+ llvm::Function *BlockInvokeFunc,
+ llvm::Value *BlockLiteral) const override;
};
}
@@ -8917,3 +8922,109 @@
return SetCGInfo(new SPIRTargetCodeGenInfo(Types));
}
}
+
+/// Create an OpenCL kernel for an enqueued block.
+///
+/// The kernel has the same function type as the block invoke function. Its
+/// name is the name of the block invoke function postfixed with "_kernel".
+/// It simply calls the block invoke function then returns.
+llvm::Function *
+TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF,
+ llvm::Function *Invoke,
+ llvm::Value *BlockLiteral) const {
+ auto *InvokeFT = Invoke->getFunctionType();
+ llvm::SmallVector<llvm::Type *, 2> ArgTys;
+ for (auto &P : InvokeFT->params())
+ ArgTys.push_back(P);
+ auto &C = CGF.getLLVMContext();
+ std::string Name = Invoke->getName().str() + "_kernel";
+ auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
+ auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
+ &CGF.CGM.getModule());
+ auto IP = CGF.Builder.saveIP();
+ auto *BB = llvm::BasicBlock::Create(C, "entry", F);
+ auto &Builder = CGF.Builder;
+ Builder.SetInsertPoint(BB);
+ llvm::SmallVector<llvm::Value *, 2> Args;
+ for (auto &A : F->args())
+ Args.push_back(&A);
+ Builder.CreateCall(Invoke, Args);
+ Builder.CreateRetVoid();
+ Builder.restoreIP(IP);
+ return F;
+}
+
+/// Create an OpenCL kernel for an enqueued block.
+///
+/// The type of the first argument (the block literal) is the struct type
+/// of the block literal instead of a pointer type. The first argument
+/// (block literal) is passed directly by value to the kernel. The kernel
+/// allocates the same type of struct on stack and stores the block literal
+/// to it and passes its pointer to the block invoke function. The kernel
+/// has "enqueued-block" function attribute and kernel argument metadata.
+llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
+ CodeGenFunction &CGF, llvm::Function *Invoke,
+ llvm::Value *BlockLiteral) const {
+ auto &Builder = CGF.Builder;
+ auto &C = CGF.getLLVMContext();
+
+ auto *BlockTy = BlockLiteral->getType()->getPointerElementType();
+ auto *InvokeFT = Invoke->getFunctionType();
+ llvm::SmallVector<llvm::Type *, 2> ArgTys;
+ llvm::SmallVector<llvm::Metadata *, 8> AddressQuals;
+ llvm::SmallVector<llvm::Metadata *, 8> AccessQuals;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals;
+ llvm::SmallVector<llvm::Metadata *, 8> ArgNames;
+
+ ArgTys.push_back(BlockTy);
+ ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
+ AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
+ ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
+ ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
+ AccessQuals.push_back(llvm::MDString::get(C, "none"));
+ ArgNames.push_back(llvm::MDString::get(C, "block_literal"));
+ for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
+ ArgTys.push_back(InvokeFT->getParamType(I));
+ ArgTys.push_back(BlockTy);
+ ArgTypeNames.push_back(llvm::MDString::get(C, "void*"));
+ AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
+ AccessQuals.push_back(llvm::MDString::get(C, "none"));
+ ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*"));
+ ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
+ ArgNames.push_back(
+ llvm::MDString::get(C, std::string("local_arg") + std::to_string(I)));
+ }
+ std::string Name = Invoke->getName().str() + "_kernel";
+ auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
+ auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
+ &CGF.CGM.getModule());
+ F->addFnAttr("enqueued-block");
+ auto IP = CGF.Builder.saveIP();
+ auto *BB = llvm::BasicBlock::Create(C, "entry", F);
+ Builder.SetInsertPoint(BB);
+ unsigned BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(BlockTy);
+ auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
+ BlockPtr->setAlignment(BlockAlign);
+ Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
+ auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
+ llvm::SmallVector<llvm::Value *, 2> Args;
+ Args.push_back(Cast);
+ for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I)
+ Args.push_back(I);
+ Builder.CreateCall(Invoke, Args);
+ Builder.CreateRetVoid();
+ Builder.restoreIP(IP);
+
+ F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));
+ F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));
+ F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));
+ F->setMetadata("kernel_arg_base_type",
+ llvm::MDNode::get(C, ArgBaseTypeNames));
+ F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));
+ if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
+ F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames));
+
+ return F;
+}
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index c9e5a40..7b91347 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -287,6 +287,16 @@
virtual TargetOpenCLBlockHelper *getTargetOpenCLBlockHelper() const {
return nullptr;
}
+
+ /// Create an OpenCL kernel for an enqueued block. The kernel function is
+ /// a wrapper for the block invoke function with target-specific calling
+ /// convention and ABI as an OpenCL kernel. The wrapper function accepts
+ /// block context and block arguments in target-specific way and calls
+ /// the original block invoke function.
+ virtual llvm::Function *
+ createEnqueuedBlockKernel(CodeGenFunction &CGF,
+ llvm::Function *BlockInvokeFunc,
+ llvm::Value *BlockLiteral) const;
};
} // namespace CodeGen