[CUDA] add support for the new kernel launch API in CUDA-9.2+.

Instead of calling CUDA runtime to arrange function arguments,
the new API constructs arguments in a local array and the kernels
are launched with __cudaLaunchKernel().

The old API has been deprecated and is expected to go away
in the next CUDA release.

Differential Revision: https://reviews.llvm.org/D57488

llvm-svn: 352799
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index ec926ea..43cc14d 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -13,6 +13,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/Basic/Cuda.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/Lookup.h"
 #include "clang/Sema/Sema.h"
@@ -41,9 +42,8 @@
                                          SourceLocation GGGLoc) {
   FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
   if (!ConfigDecl)
-    return ExprError(
-        Diag(LLLLoc, diag::err_undeclared_var_use)
-        << (getLangOpts().HIP ? "hipConfigureCall" : "cudaConfigureCall"));
+    return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
+                     << getCudaConfigureFuncName());
   QualType ConfigQTy = ConfigDecl->getType();
 
   DeclRefExpr *ConfigDR = new (Context)
@@ -957,3 +957,16 @@
   copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD);
   copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD);
 }
+
+std::string Sema::getCudaConfigureFuncName() const {
+  if (getLangOpts().HIP)
+    return "hipConfigureCall";
+
+  // New CUDA kernel launch sequence.
+  if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(),
+                         CudaFeature::CUDA_USES_NEW_LAUNCH))
+    return "__cudaPushCallConfiguration";
+
+  // Legacy CUDA kernel configuration call
+  return "cudaConfigureCall";
+}