[OpenCL] Do not use vararg in emitted functions for enqueue_kernel
Not all targets support vararg (e.g. amdgpu). Instead of using vararg in the emitted functions for enqueue_kernel,
this patch creates a temporary array of size_t, stores the size arguments in the temporary array
and passes it to the emitted functions for enqueue_kernel.
Differential Revision: https://reviews.llvm.org/D36678
llvm-svn: 312441
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e6ae74a..1923c58 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2601,27 +2601,50 @@
}
assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
+ // Create a temporary array to hold the sizes of local pointer arguments
+ // for the block. \p First is the position of the first size argument.
+ auto CreateArrayForSizeVar = [=](unsigned First) {
+ auto *AT = llvm::ArrayType::get(SizeTy, NumArgs - First);
+ auto *Arr = Builder.CreateAlloca(AT);
+ llvm::Value *Ptr;
+ // Each of the following arguments specifies the size of the corresponding
+ // argument passed to the enqueued block.
+ auto *Zero = llvm::ConstantInt::get(IntTy, 0);
+ for (unsigned I = First; I < NumArgs; ++I) {
+ auto *Index = llvm::ConstantInt::get(IntTy, I - First);
+ auto *GEP = Builder.CreateGEP(Arr, {Zero, Index});
+ if (I == First)
+ Ptr = GEP;
+ auto *V =
+ Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
+ Builder.CreateAlignedStore(
+ V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
+ }
+ return Ptr;
+ };
+
// Could have events and/or vaargs.
if (E->getArg(3)->getType()->isBlockPointerType()) {
// No events passed, but has variadic arguments.
Name = "__enqueue_kernel_vaargs";
- llvm::Value *Block = Builder.CreatePointerCast(
- EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
+ auto *Block = Builder.CreatePointerCast(EmitScalarExpr(E->getArg(3)),
+ GenericVoidPtrTy);
+ auto *PtrToSizeArray = CreateArrayForSizeVar(4);
+
// Create a vector of the arguments, as well as a constant value to
// express to the runtime the number of variadic arguments.
- std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
- ConstantInt::get(IntTy, NumArgs - 4)};
- std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
- GenericVoidPtrTy, IntTy};
-
- // Each of the following arguments specifies the size of the corresponding
- // argument passed to the enqueued block.
- for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
- Args.push_back(
- Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
+ std::vector<llvm::Value *> Args = {Queue,
+ Flags,
+ Range,
+ Block,
+ ConstantInt::get(IntTy, NumArgs - 4),
+ PtrToSizeArray};
+ std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy,
+ RangeTy, GenericVoidPtrTy,
+ IntTy, PtrToSizeArray->getType()};
llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
return RValue::get(
Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
llvm::ArrayRef<llvm::Value *>(Args)));
@@ -2667,14 +2690,12 @@
ArgTys.push_back(Int32Ty);
Name = "__enqueue_kernel_events_vaargs";
- // Each of the following arguments specifies the size of the corresponding
- // argument passed to the enqueued block.
- for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
- Args.push_back(
- Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
+ auto *PtrToSizeArray = CreateArrayForSizeVar(7);
+ Args.push_back(PtrToSizeArray);
+ ArgTys.push_back(PtrToSizeArray->getType());
llvm::FunctionType *FTy = llvm::FunctionType::get(
- Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
+ Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
return RValue::get(
Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
llvm::ArrayRef<llvm::Value *>(Args)));