Blame - clang/lib/Driver/ToolChains/Cuda.cpp - toolchain/llvm-project

blob: 49a23028659d74610ad5c6e4acaa361534f5b042 [file] [log] [blame]

David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "Cuda.h"
				11	#include "InputInfo.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	12	#include "CommonArgs.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	13	#include "clang/Basic/Cuda.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	14	#include "clang/Config/config.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	15	#include "clang/Basic/VirtualFileSystem.h"
				16	#include "clang/Driver/Compilation.h"
				17	#include "clang/Driver/Driver.h"
				18	#include "clang/Driver/DriverDiagnostic.h"
				19	#include "clang/Driver/Options.h"
				20	#include "llvm/Option/ArgList.h"
				21	#include "llvm/Support/Path.h"
				22	#include <system_error>
				23
				24	using namespace clang::driver;
				25	using namespace clang::driver::toolchains;
				26	using namespace clang::driver::tools;
				27	using namespace clang;
				28	using namespace llvm::opt;
				29
				30	// Parses the contents of version.txt in an CUDA installation. It should
				31	// contain one line of the from e.g. "CUDA Version 7.5.2".
				32	static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
				33	if (!V.startswith("CUDA Version "))
				34	return CudaVersion::UNKNOWN;
				35	V = V.substr(strlen("CUDA Version "));
				36	int Major = -1, Minor = -1;
				37	auto First = V.split('.');
				38	auto Second = First.second.split('.');
				39	if (First.first.getAsInteger(10, Major) \|\|
				40	Second.first.getAsInteger(10, Minor))
				41	return CudaVersion::UNKNOWN;
				42
				43	if (Major == 7 && Minor == 0) {
				44	// This doesn't appear to ever happen -- version.txt doesn't exist in the
				45	// CUDA 7 installs I've seen. But no harm in checking.
				46	return CudaVersion::CUDA_70;
				47	}
				48	if (Major == 7 && Minor == 5)
				49	return CudaVersion::CUDA_75;
				50	if (Major == 8 && Minor == 0)
				51	return CudaVersion::CUDA_80;
				52	return CudaVersion::UNKNOWN;
				53	}
				54
				55	CudaInstallationDetector::CudaInstallationDetector(
				56	const Driver &D, const llvm::Triple &HostTriple,
				57	const llvm::opt::ArgList &Args)
				58	: D(D) {
				59	SmallVector<std::string, 4> CudaPathCandidates;
				60
				61	// In decreasing order so we prefer newer versions to older versions.
				62	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
				63
				64	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
				65	CudaPathCandidates.push_back(
				66	Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
				67	} else if (HostTriple.isOSWindows()) {
				68	for (const char *Ver : Versions)
				69	CudaPathCandidates.push_back(
				70	D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
				71	Ver);
				72	} else {
				73	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
				74	for (const char *Ver : Versions)
				75	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
				76	}
				77
				78	for (const auto &CudaPath : CudaPathCandidates) {
				79	if (CudaPath.empty() \|\| !D.getVFS().exists(CudaPath))
				80	continue;
				81
				82	InstallPath = CudaPath;
				83	BinPath = CudaPath + "/bin";
				84	IncludePath = InstallPath + "/include";
				85	LibDevicePath = InstallPath + "/nvvm/libdevice";
				86
				87	auto &FS = D.getVFS();
				88	if (!(FS.exists(IncludePath) && FS.exists(BinPath) &&
				89	FS.exists(LibDevicePath)))
				90	continue;
				91
				92	// On Linux, we have both lib and lib64 directories, and we need to choose
				93	// based on our triple. On MacOS, we have only a lib directory.
				94	//
				95	// It's sufficient for our purposes to be flexible: If both lib and lib64
				96	// exist, we choose whichever one matches our triple. Otherwise, if only
				97	// lib exists, we use it.
				98	if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
				99	LibPath = InstallPath + "/lib64";
				100	else if (FS.exists(InstallPath + "/lib"))
				101	LibPath = InstallPath + "/lib";
				102	else
				103	continue;
				104
				105	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
				106	FS.getBufferForFile(InstallPath + "/version.txt");
				107	if (!VersionFile) {
				108	// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
				109	// version.txt isn't present.
				110	Version = CudaVersion::CUDA_70;
				111	} else {
				112	Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
				113	}
				114
				115	std::error_code EC;
				116	for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
				117	!EC && LI != LE; LI = LI.increment(EC)) {
				118	StringRef FilePath = LI->path();
				119	StringRef FileName = llvm::sys::path::filename(FilePath);
				120	// Process all bitcode filenames that look like libdevice.compute_XX.YY.bc
				121	const StringRef LibDeviceName = "libdevice.";
				122	if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
				123	continue;
				124	StringRef GpuArch = FileName.slice(
				125	LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
				126	LibDeviceMap[GpuArch] = FilePath.str();
				127	// Insert map entries for specifc devices with this compute
				128	// capability. NVCC's choice of the libdevice library version is
				129	// rather peculiar and depends on the CUDA version.
				130	if (GpuArch == "compute_20") {
				131	LibDeviceMap["sm_20"] = FilePath;
				132	LibDeviceMap["sm_21"] = FilePath;
				133	LibDeviceMap["sm_32"] = FilePath;
				134	} else if (GpuArch == "compute_30") {
				135	LibDeviceMap["sm_30"] = FilePath;
				136	if (Version < CudaVersion::CUDA_80) {
				137	LibDeviceMap["sm_50"] = FilePath;
				138	LibDeviceMap["sm_52"] = FilePath;
				139	LibDeviceMap["sm_53"] = FilePath;
				140	}
				141	LibDeviceMap["sm_60"] = FilePath;
				142	LibDeviceMap["sm_61"] = FilePath;
				143	LibDeviceMap["sm_62"] = FilePath;
				144	} else if (GpuArch == "compute_35") {
				145	LibDeviceMap["sm_35"] = FilePath;
				146	LibDeviceMap["sm_37"] = FilePath;
				147	} else if (GpuArch == "compute_50") {
				148	if (Version >= CudaVersion::CUDA_80) {
				149	LibDeviceMap["sm_50"] = FilePath;
				150	LibDeviceMap["sm_52"] = FilePath;
				151	LibDeviceMap["sm_53"] = FilePath;
				152	}
				153	}
				154	}
				155
Gheorghe-Teodor Bercea	9c52574	2017-08-11 15:46:22 +0000	[diff] [blame^]	156	// This code prevents IsValid from being set when
				157	// no libdevice has been found.
				158	bool allEmpty = true;
				159	std::string LibDeviceFile;
				160	for (auto key : LibDeviceMap.keys()) {
				161	LibDeviceFile = LibDeviceMap.lookup(key);
				162	if (!LibDeviceFile.empty())
				163	allEmpty = false;
				164	}
				165
				166	if (allEmpty)
				167	continue;
				168
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	169	IsValid = true;
				170	break;
				171	}
				172	}
				173
				174	void CudaInstallationDetector::AddCudaIncludeArgs(
				175	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
				176	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
				177	// Add cuda_wrappers/* to our system include path. This lets us wrap
				178	// standard library headers.
				179	SmallString<128> P(D.ResourceDir);
				180	llvm::sys::path::append(P, "include");
				181	llvm::sys::path::append(P, "cuda_wrappers");
				182	CC1Args.push_back("-internal-isystem");
				183	CC1Args.push_back(DriverArgs.MakeArgString(P));
				184	}
				185
				186	if (DriverArgs.hasArg(options::OPT_nocudainc))
				187	return;
				188
				189	if (!isValid()) {
				190	D.Diag(diag::err_drv_no_cuda_installation);
				191	return;
				192	}
				193
				194	CC1Args.push_back("-internal-isystem");
				195	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
				196	CC1Args.push_back("-include");
				197	CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
				198	}
				199
				200	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
				201	CudaArch Arch) const {
				202	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
				203	ArchsWithVersionTooLowErrors.count(Arch) > 0)
				204	return;
				205
				206	auto RequiredVersion = MinVersionForCudaArch(Arch);
				207	if (Version < RequiredVersion) {
				208	ArchsWithVersionTooLowErrors.insert(Arch);
				209	D.Diag(diag::err_drv_cuda_version_too_low)
				210	<< InstallPath << CudaArchToString(Arch) << CudaVersionToString(Version)
				211	<< CudaVersionToString(RequiredVersion);
				212	}
				213	}
				214
				215	void CudaInstallationDetector::print(raw_ostream &OS) const {
				216	if (isValid())
				217	OS << "Found CUDA installation: " << InstallPath << ", version "
				218	<< CudaVersionToString(Version) << "\n";
				219	}
				220
				221	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
				222	const InputInfo &Output,
				223	const InputInfoList &Inputs,
				224	const ArgList &Args,
				225	const char *LinkingOutput) const {
				226	const auto &TC =
				227	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				228	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				229
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	230	StringRef GPUArchName;
				231	// If this is an OpenMP action we need to extract the device architecture
				232	// from the -march=arch option. This option may come from -Xopenmp-target
				233	// flag or the default value.
				234	if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
				235	GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
				236	assert(!GPUArchName.empty() && "Must have an architecture passed in.");
				237	} else
				238	GPUArchName = JA.getOffloadingArch();
				239
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	240	// Obtain architecture from the action.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	241	CudaArch gpu_arch = StringToCudaArch(GPUArchName);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	242	assert(gpu_arch != CudaArch::UNKNOWN &&
				243	"Device action expected to have an architecture.");
				244
				245	// Check that our installation's ptxas supports gpu_arch.
				246	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
				247	TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
				248	}
				249
				250	ArgStringList CmdArgs;
				251	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
				252	if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
				253	options::OPT_no_cuda_noopt_device_debug, false)) {
				254	// ptxas does not accept -g option if optimization is enabled, so
				255	// we ignore the compiler's -O* options if we want debug info.
				256	CmdArgs.push_back("-g");
				257	CmdArgs.push_back("--dont-merge-basicblocks");
				258	CmdArgs.push_back("--return-at-end");
				259	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
				260	// Map the -O we received to -O{0,1,2,3}.
				261	//
				262	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
				263	// default, so it may correspond more closely to the spirit of clang -O2.
				264
				265	// -O3 seems like the least-bad option when -Osomething is specified to
				266	// clang but it isn't handled below.
				267	StringRef OOpt = "3";
				268	if (A->getOption().matches(options::OPT_O4) \|\|
				269	A->getOption().matches(options::OPT_Ofast))
				270	OOpt = "3";
				271	else if (A->getOption().matches(options::OPT_O0))
				272	OOpt = "0";
				273	else if (A->getOption().matches(options::OPT_O)) {
				274	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
				275	OOpt = llvm::StringSwitch<const char *>(A->getValue())
				276	.Case("1", "1")
				277	.Case("2", "2")
				278	.Case("3", "3")
				279	.Case("s", "2")
				280	.Case("z", "2")
				281	.Default("2");
				282	}
				283	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
				284	} else {
				285	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
				286	// to no optimizations, but ptxas's default is -O3.
				287	CmdArgs.push_back("-O0");
				288	}
				289
Gheorghe-Teodor Bercea	53431bc	2017-08-07 20:19:23 +0000	[diff] [blame]	290	// Pass -v to ptxas if it was passed to the driver.
				291	if (Args.hasArg(options::OPT_v))
				292	CmdArgs.push_back("-v");
				293
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	294	CmdArgs.push_back("--gpu-name");
				295	CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
				296	CmdArgs.push_back("--output-file");
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	297	SmallString<256> OutputFileName(Output.getFilename());
				298	if (JA.isOffloading(Action::OFK_OpenMP))
				299	llvm::sys::path::replace_extension(OutputFileName, "cubin");
				300	CmdArgs.push_back(Args.MakeArgString(OutputFileName));
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	301	for (const auto& II : Inputs)
				302	CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
				303
				304	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
				305	CmdArgs.push_back(Args.MakeArgString(A));
				306
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	307	// In OpenMP we need to generate relocatable code.
Gheorghe-Teodor Bercea	0846582	2017-08-09 15:27:39 +0000	[diff] [blame]	308	if (JA.isOffloading(Action::OFK_OpenMP) &&
				309	Args.hasFlag(options::OPT_fopenmp_relocatable_target,
				310	options::OPT_fnoopenmp_relocatable_target,
				311	/Default=/ true))
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	312	CmdArgs.push_back("-c");
				313
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	314	const char *Exec;
				315	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
				316	Exec = A->getValue();
				317	else
				318	Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
				319	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				320	}
				321
				322	// All inputs to this linker must be from CudaDeviceActions, as we need to look
				323	// at the Inputs' Actions in order to figure out which GPU architecture they
				324	// correspond to.
				325	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
				326	const InputInfo &Output,
				327	const InputInfoList &Inputs,
				328	const ArgList &Args,
				329	const char *LinkingOutput) const {
				330	const auto &TC =
				331	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				332	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				333
				334	ArgStringList CmdArgs;
				335	CmdArgs.push_back("--cuda");
				336	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
				337	CmdArgs.push_back(Args.MakeArgString("--create"));
				338	CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
				339
				340	for (const auto& II : Inputs) {
				341	auto *A = II.getAction();
				342	assert(A->getInputs().size() == 1 &&
				343	"Device offload action is expected to have a single input");
				344	const char *gpu_arch_str = A->getOffloadingArch();
				345	assert(gpu_arch_str &&
				346	"Device action expected to have associated a GPU architecture!");
				347	CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
				348
				349	// We need to pass an Arch of the form "sm_XX" for cubin files and
				350	// "compute_XX" for ptx.
				351	const char *Arch =
				352	(II.getType() == types::TY_PP_Asm)
				353	? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
				354	: gpu_arch_str;
				355	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
				356	Arch + ",file=" + II.getFilename()));
				357	}
				358
				359	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
				360	CmdArgs.push_back(Args.MakeArgString(A));
				361
				362	const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
				363	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				364	}
				365
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	366	void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
				367	const InputInfo &Output,
				368	const InputInfoList &Inputs,
				369	const ArgList &Args,
				370	const char *LinkingOutput) const {
				371	const auto &TC =
				372	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				373	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				374
				375	ArgStringList CmdArgs;
				376
				377	// OpenMP uses nvlink to link cubin files. The result will be embedded in the
				378	// host binary by the host linker.
				379	assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
				380	"CUDA toolchain not expected for an OpenMP host device.");
				381
				382	if (Output.isFilename()) {
				383	CmdArgs.push_back("-o");
				384	CmdArgs.push_back(Output.getFilename());
				385	} else
				386	assert(Output.isNothing() && "Invalid output.");
				387	if (Args.hasArg(options::OPT_g_Flag))
				388	CmdArgs.push_back("-g");
				389
				390	if (Args.hasArg(options::OPT_v))
				391	CmdArgs.push_back("-v");
				392
				393	StringRef GPUArch =
				394	Args.getLastArgValue(options::OPT_march_EQ);
				395	assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
				396
				397	CmdArgs.push_back("-arch");
				398	CmdArgs.push_back(Args.MakeArgString(GPUArch));
				399
				400	// Add paths specified in LIBRARY_PATH environment variable as -L options.
				401	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
				402
				403	// Add paths for the default clang library path.
				404	SmallString<256> DefaultLibPath =
				405	llvm::sys::path::parent_path(TC.getDriver().Dir);
				406	llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
				407	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
				408
				409	// Add linking against library implementing OpenMP calls on NVPTX target.
				410	CmdArgs.push_back("-lomptarget-nvptx");
				411
				412	for (const auto &II : Inputs) {
				413	if (II.getType() == types::TY_LLVM_IR \|\|
				414	II.getType() == types::TY_LTO_IR \|\|
				415	II.getType() == types::TY_LTO_BC \|\|
				416	II.getType() == types::TY_LLVM_BC) {
				417	C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
				418	<< getToolChain().getTripleString();
				419	continue;
				420	}
				421
				422	// Currently, we only pass the input files to the linker, we do not pass
				423	// any libraries that may be valid only for the host.
				424	if (!II.isFilename())
				425	continue;
				426
				427	SmallString<256> Name = llvm::sys::path::filename(II.getFilename());
				428	llvm::sys::path::replace_extension(Name, "cubin");
				429
				430	const char *CubinF =
				431	C.addTempFile(C.getArgs().MakeArgString(Name));
				432
				433	CmdArgs.push_back(CubinF);
				434	}
				435
				436	AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
				437
				438	const char *Exec =
				439	Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
				440	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				441	}
				442
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	443	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
				444	/// which isn't properly a linker but nonetheless performs the step of stitching
				445	/// together object files from the assembler into a single blob.
				446
				447	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	448	const ToolChain &HostTC, const ArgList &Args,
				449	const Action::OffloadKind OK)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	450	: ToolChain(D, Triple, Args), HostTC(HostTC),
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	451	CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	452	if (CudaInstallation.isValid())
				453	getProgramPaths().push_back(CudaInstallation.getBinPath());
Gheorghe-Teodor Bercea	690f6f9	2017-08-09 19:52:28 +0000	[diff] [blame]	454	// Lookup binaries into the driver directory, this is used to
				455	// discover the clang-offload-bundler executable.
				456	getProgramPaths().push_back(getDriver().Dir);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	457	}
				458
				459	void CudaToolChain::addClangTargetOptions(
				460	const llvm::opt::ArgList &DriverArgs,
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	461	llvm::opt::ArgStringList &CC1Args,
				462	Action::OffloadKind DeviceOffloadingKind) const {
				463	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	464
				465	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				466	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	467	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
				468	DeviceOffloadingKind == Action::OFK_Cuda) &&
				469	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
				470
				471	if (DeviceOffloadingKind == Action::OFK_Cuda) {
				472	CC1Args.push_back("-fcuda-is-device");
				473
				474	if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
				475	options::OPT_fno_cuda_flush_denormals_to_zero, false))
				476	CC1Args.push_back("-fcuda-flush-denormals-to-zero");
				477
				478	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
				479	options::OPT_fno_cuda_approx_transcendentals, false))
				480	CC1Args.push_back("-fcuda-approx-transcendentals");
				481
				482	if (DriverArgs.hasArg(options::OPT_nocudalib))
				483	return;
				484	}
				485
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	486	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
				487
				488	if (LibDeviceFile.empty()) {
				489	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
				490	return;
				491	}
				492
				493	CC1Args.push_back("-mlink-cuda-bitcode");
				494	CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
				495
				496	// Libdevice in CUDA-7.0 requires PTX version that's more recent
				497	// than LLVM defaults to. Use PTX4.2 which is the PTX version that
				498	// came with CUDA-7.0.
				499	CC1Args.push_back("-target-feature");
Alex Lorenz	994f231	2017-08-10 10:34:46 +0000	[diff] [blame]	500	CC1Args.push_back("+ptx42");
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	501	}
				502
				503	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
				504	ArgStringList &CC1Args) const {
				505	// Check our CUDA version if we're going to include the CUDA headers.
				506	if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
				507	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
				508	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				509	assert(!Arch.empty() && "Must have an explicit GPU arch.");
				510	CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
				511	}
				512	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
				513	}
				514
				515	llvm::opt::DerivedArgList *
				516	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
				517	StringRef BoundArch,
				518	Action::OffloadKind DeviceOffloadKind) const {
				519	DerivedArgList *DAL =
				520	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
				521	if (!DAL)
				522	DAL = new DerivedArgList(Args.getBaseArgs());
				523
				524	const OptTable &Opts = getDriver().getOpts();
				525
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	526	// For OpenMP device offloading, append derived arguments. Make sure
				527	// flags are not duplicated.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	528	// Also append the compute capability.
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	529	if (DeviceOffloadKind == Action::OFK_OpenMP) {
				530	for (Arg *A : Args){
				531	bool IsDuplicate = false;
				532	for (Arg DALArg : DAL){
				533	if (A == DALArg) {
				534	IsDuplicate = true;
				535	break;
				536	}
				537	}
				538	if (!IsDuplicate)
				539	DAL->append(A);
				540	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	541
				542	StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
Gheorghe-Teodor Bercea	14528c6	2017-08-10 16:56:59 +0000	[diff] [blame]	543	if (Arch.empty()) {
				544	// Default compute capability for CUDA toolchain is the
				545	// lowest compute capability supported by the installed
				546	// CUDA version.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	547	DAL->AddJoinedArg(nullptr,
Gheorghe-Teodor Bercea	14528c6	2017-08-10 16:56:59 +0000	[diff] [blame]	548	Opts.getOption(options::OPT_march_EQ),
				549	CudaInstallation.getLowestExistingArch());
				550	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	551
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	552	return DAL;
				553	}
				554
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	555	for (Arg *A : Args) {
				556	if (A->getOption().matches(options::OPT_Xarch__)) {
				557	// Skip this argument unless the architecture matches BoundArch
				558	if (BoundArch.empty() \|\| A->getValue(0) != BoundArch)
				559	continue;
				560
				561	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
				562	unsigned Prev = Index;
				563	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
				564
				565	// If the argument parsing failed or more than one argument was
				566	// consumed, the -Xarch_ argument's parameter tried to consume
				567	// extra arguments. Emit an error and ignore.
				568	//
				569	// We also want to disallow any options which would alter the
				570	// driver behavior; that isn't going to work in our model. We
				571	// use isDriverOption() as an approximation, although things
				572	// like -O4 are going to slip through.
				573	if (!XarchArg \|\| Index > Prev + 1) {
				574	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
				575	<< A->getAsString(Args);
				576	continue;
				577	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
				578	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
				579	<< A->getAsString(Args);
				580	continue;
				581	}
				582	XarchArg->setBaseArg(A);
				583	A = XarchArg.release();
				584	DAL->AddSynthesizedArg(A);
				585	}
				586	DAL->append(A);
				587	}
				588
				589	if (!BoundArch.empty()) {
				590	DAL->eraseArg(options::OPT_march_EQ);
				591	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
				592	}
				593	return DAL;
				594	}
				595
				596	Tool *CudaToolChain::buildAssembler() const {
				597	return new tools::NVPTX::Assembler(*this);
				598	}
				599
				600	Tool *CudaToolChain::buildLinker() const {
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	601	if (OK == Action::OFK_OpenMP)
				602	return new tools::NVPTX::OpenMPLinker(*this);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	603	return new tools::NVPTX::Linker(*this);
				604	}
				605
				606	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
				607	HostTC.addClangWarningOptions(CC1Args);
				608	}
				609
				610	ToolChain::CXXStdlibType
				611	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
				612	return HostTC.GetCXXStdlibType(Args);
				613	}
				614
				615	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
				616	ArgStringList &CC1Args) const {
				617	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
				618	}
				619
				620	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
				621	ArgStringList &CC1Args) const {
				622	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
				623	}
				624
				625	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
				626	ArgStringList &CC1Args) const {
				627	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
				628	}
				629
				630	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
				631	// The CudaToolChain only supports sanitizers in the sense that it allows
				632	// sanitizer arguments on the command line if they are supported by the host
				633	// toolchain. The CudaToolChain will actually ignore any command line
				634	// arguments for any of these "supported" sanitizers. That means that no
				635	// sanitization of device code is actually supported at this time.
				636	//
				637	// This behavior is necessary because the host and device toolchains
				638	// invocations often share the command line, so the device toolchain must
				639	// tolerate flags meant only for the host toolchain.
				640	return HostTC.getSupportedSanitizers();
				641	}
				642
				643	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
				644	const ArgList &Args) const {
				645	return HostTC.computeMSVCVersion(D, Args);
				646	}