[CUDA] add support for the new kernel launch API in CUDA-9.2+.
Instead of calling CUDA runtime to arrange function arguments,
the new API constructs arguments in a local array and the kernels
are launched with __cudaLaunchKernel().
The old API has been deprecated and is expected to go away
in the next CUDA release.
Differential Revision: https://reviews.llvm.org/D57488
llvm-svn: 352799
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index ec926ea..43cc14d 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -13,6 +13,7 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/ExprCXX.h"
+#include "clang/Basic/Cuda.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Sema/Lookup.h"
#include "clang/Sema/Sema.h"
@@ -41,9 +42,8 @@
SourceLocation GGGLoc) {
FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
if (!ConfigDecl)
- return ExprError(
- Diag(LLLLoc, diag::err_undeclared_var_use)
- << (getLangOpts().HIP ? "hipConfigureCall" : "cudaConfigureCall"));
+ return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
+ << getCudaConfigureFuncName());
QualType ConfigQTy = ConfigDecl->getType();
DeclRefExpr *ConfigDR = new (Context)
@@ -957,3 +957,16 @@
copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
}
+
+std::string Sema::getCudaConfigureFuncName() const {
+ if (getLangOpts().HIP)
+ return "hipConfigureCall";
+
+ // New CUDA kernel launch sequence.
+ if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
+ CudaFeature::CUDA_USES_NEW_LAUNCH))
+ return "__cudaPushCallConfiguration";
+
+ // Legacy CUDA kernel configuration call
+ return "cudaConfigureCall";
+}