[OpenMP] Add flag for specifying the target device architecture for OpenMP device offloading Summary: OpenMP has the ability to offload target regions to devices which may have different architectures. A new -fopenmp-target-arch flag is introduced to specify the device architecture. In this patch I use the new flag to specify the compute capability of the underlying NVIDIA architecture for the OpenMP offloading CUDA tool chain. Only a host-offloading test is provided since full device offloading capability will only be available when [[ https://reviews.llvm.org/D29654 | D29654 ]] lands. Reviewers: hfinkel, Hahnfeld, carlo.bertolli, caomhin, ABataev Reviewed By: hfinkel Subscribers: guansong, cfe-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D34784 llvm-svn: 310263

commit: 47e0cf378c793b00207998a0537c2fd75bd1ec74 [log] [tgz]
author: Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> Mon Aug 07 15:39:11 2017 +0000
committer: Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> Mon Aug 07 15:39:11 2017 +0000
tree: 3f667d95b0f1bf3a9a5009c9c8a568374d48cfb2
parent: 02d9945e6f11959887e2eefbbb5a635ff5087dbe [diff] [blame]
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 935a5a3..86be187 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp

@@ -212,8 +212,18 @@
       static_cast<const toolchains::CudaToolChain &>(getToolChain());
   assert(TC.getTriple().isNVPTX() && "Wrong platform");
 
+  StringRef GPUArchName;
+  // If this is an OpenMP action we need to extract the device architecture
+  // from the -march=arch option. This option may come from -Xopenmp-target
+  // flag or the default value.
+  if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
+    GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
+    assert(!GPUArchName.empty() && "Must have an architecture passed in.");
+  } else
+    GPUArchName = JA.getOffloadingArch();
+
   // Obtain architecture from the action.
-  CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch());
+  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
   assert(gpu_arch != CudaArch::UNKNOWN &&
          "Device action expected to have an architecture.");
 
@@ -405,7 +415,7 @@
 
   // For OpenMP device offloading, append derived arguments. Make sure
   // flags are not duplicated.
-  // TODO: Append the compute capability.
+  // Also append the compute capability.
   if (DeviceOffloadKind == Action::OFK_OpenMP) {
     for (Arg *A : Args){
       bool IsDuplicate = false;
@@ -418,6 +428,13 @@
       if (!IsDuplicate)
         DAL->append(A);
     }
+
+    StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
+    if (Arch.empty())
+      // Default compute capability for CUDA toolchain is sm_20.
+      DAL->AddJoinedArg(nullptr,
+          Opts.getOption(options::OPT_march_EQ), "sm_20");
+
     return DAL;
   }
commit	47e0cf378c793b00207998a0537c2fd75bd1ec74	[log] [tgz]
author	Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>	Mon Aug 07 15:39:11 2017 +0000
committer	Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com>	Mon Aug 07 15:39:11 2017 +0000
tree	3f667d95b0f1bf3a9a5009c9c8a568374d48cfb2
parent	02d9945e6f11959887e2eefbbb5a635ff5087dbe [diff] [blame]