[NVPTX, CUDA] Added support for m8n32k16 and m32n8k16 variants of wmma instructions. The new instructions were added added for sm_70+ GPUs in CUDA-9.1. Differential Revision: https://reviews.llvm.org/D45068 llvm-svn: 330296

commit: 0ae8590354b8688e1ec9926abc909b896ea49038 [log] [tgz]
author: Artem Belevich <tra@google.com> Wed Apr 18 21:51:48 2018 +0000
committer: Artem Belevich <tra@google.com> Wed Apr 18 21:51:48 2018 +0000
tree: 3c803aae33ad4fd575d7d3672138519c7b20e0fc
parent: c310bfa19397e15903a8f5386b51366aade414b9 [diff] [blame]
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index fc74a8e..fdd62fe 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp

@@ -622,17 +622,19 @@
   CC1Args.push_back("-mlink-cuda-bitcode");
   CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
 
-  if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
-    // CUDA-9 uses new instructions that are only available in PTX6.0
-    CC1Args.push_back("-target-feature");
-    CC1Args.push_back("+ptx60");
-  } else {
-    // Libdevice in CUDA-7.0 requires PTX version that's more recent
-    // than LLVM defaults to. Use PTX4.2 which is the PTX version that
-    // came with CUDA-7.0.
-    CC1Args.push_back("-target-feature");
-    CC1Args.push_back("+ptx42");
+  // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
+  // defaults to. Use PTX4.2 by default, which is the PTX version that came with
+  // CUDA-7.0.
+  const char *PtxFeature = "+ptx42";
+  if (CudaInstallation.version() >= CudaVersion::CUDA_91) {
+    // CUDA-9.1 uses new instructions that are only available in PTX6.1+
+    PtxFeature = "+ptx61";
+  } else if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
+    // CUDA-9.0 uses new instructions that are only available in PTX6.0+
+    PtxFeature = "+ptx60";
   }
+  CC1Args.push_back("-target-feature");
+  CC1Args.push_back(PtxFeature);
 
   if (DeviceOffloadingKind == Action::OFK_OpenMP) {
     SmallVector<StringRef, 8> LibraryPaths;
commit	0ae8590354b8688e1ec9926abc909b896ea49038	[log] [tgz]
author	Artem Belevich <tra@google.com>	Wed Apr 18 21:51:48 2018 +0000
committer	Artem Belevich <tra@google.com>	Wed Apr 18 21:51:48 2018 +0000
tree	3c803aae33ad4fd575d7d3672138519c7b20e0fc
parent	c310bfa19397e15903a8f5386b51366aade414b9 [diff] [blame]