[CMake][OpenMP] Customize default offloading arch
For the shuffle instructions in reductions we need at least sm_30
but the user may want to customize the default architecture.
Differential Revision: https://reviews.llvm.org/D38883
llvm-svn: 315996
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 4d040a2..4f740fc 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -542,9 +542,9 @@
// flags are not duplicated.
// Also append the compute capability.
if (DeviceOffloadKind == Action::OFK_OpenMP) {
- for (Arg *A : Args){
+ for (Arg *A : Args) {
bool IsDuplicate = false;
- for (Arg *DALArg : *DAL){
+ for (Arg *DALArg : *DAL) {
if (A == DALArg) {
IsDuplicate = true;
break;
@@ -555,14 +555,9 @@
}
StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
- if (Arch.empty()) {
- // Default compute capability for CUDA toolchain is the
- // lowest compute capability supported by the installed
- // CUDA version.
- DAL->AddJoinedArg(nullptr,
- Opts.getOption(options::OPT_march_EQ),
- CudaInstallation.getLowestExistingArch());
- }
+ if (Arch.empty())
+ DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+ CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
return DAL;
}
diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h
index 5144f5b..1e30aa7 100644
--- a/clang/lib/Driver/ToolChains/Cuda.h
+++ b/clang/lib/Driver/ToolChains/Cuda.h
@@ -76,17 +76,6 @@
std::string getLibDeviceFile(StringRef Gpu) const {
return LibDeviceMap.lookup(Gpu);
}
- /// \brief Get lowest available compute capability
- /// for which a libdevice library exists.
- std::string getLowestExistingArch() const {
- std::string LibDeviceFile;
- for (auto key : LibDeviceMap.keys()) {
- LibDeviceFile = LibDeviceMap.lookup(key);
- if (!LibDeviceFile.empty())
- return key;
- }
- return "sm_20";
- }
};
namespace tools {