Blame - clang/lib/Driver/ToolChains/Cuda.cpp - toolchain/llvm-project

blob: 44ec16e8b86dca1a3a1f878c0aae0966cbc2f5f0 [file] [log] [blame]

David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "Cuda.h"
				11	#include "InputInfo.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	12	#include "CommonArgs.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	13	#include "clang/Basic/Cuda.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	14	#include "clang/Config/config.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	15	#include "clang/Basic/VirtualFileSystem.h"
				16	#include "clang/Driver/Compilation.h"
				17	#include "clang/Driver/Driver.h"
				18	#include "clang/Driver/DriverDiagnostic.h"
				19	#include "clang/Driver/Options.h"
				20	#include "llvm/Option/ArgList.h"
				21	#include "llvm/Support/Path.h"
				22	#include <system_error>
				23
				24	using namespace clang::driver;
				25	using namespace clang::driver::toolchains;
				26	using namespace clang::driver::tools;
				27	using namespace clang;
				28	using namespace llvm::opt;
				29
				30	// Parses the contents of version.txt in an CUDA installation. It should
				31	// contain one line of the from e.g. "CUDA Version 7.5.2".
				32	static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
				33	if (!V.startswith("CUDA Version "))
				34	return CudaVersion::UNKNOWN;
				35	V = V.substr(strlen("CUDA Version "));
				36	int Major = -1, Minor = -1;
				37	auto First = V.split('.');
				38	auto Second = First.second.split('.');
				39	if (First.first.getAsInteger(10, Major) \|\|
				40	Second.first.getAsInteger(10, Minor))
				41	return CudaVersion::UNKNOWN;
				42
				43	if (Major == 7 && Minor == 0) {
				44	// This doesn't appear to ever happen -- version.txt doesn't exist in the
				45	// CUDA 7 installs I've seen. But no harm in checking.
				46	return CudaVersion::CUDA_70;
				47	}
				48	if (Major == 7 && Minor == 5)
				49	return CudaVersion::CUDA_75;
				50	if (Major == 8 && Minor == 0)
				51	return CudaVersion::CUDA_80;
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	52	if (Major == 9 && Minor == 0)
				53	return CudaVersion::CUDA_90;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	54	return CudaVersion::UNKNOWN;
				55	}
				56
				57	CudaInstallationDetector::CudaInstallationDetector(
				58	const Driver &D, const llvm::Triple &HostTriple,
				59	const llvm::opt::ArgList &Args)
				60	: D(D) {
				61	SmallVector<std::string, 4> CudaPathCandidates;
				62
				63	// In decreasing order so we prefer newer versions to older versions.
				64	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
				65
				66	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
				67	CudaPathCandidates.push_back(
				68	Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
				69	} else if (HostTriple.isOSWindows()) {
				70	for (const char *Ver : Versions)
				71	CudaPathCandidates.push_back(
				72	D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
				73	Ver);
				74	} else {
				75	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
				76	for (const char *Ver : Versions)
				77	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
				78	}
				79
				80	for (const auto &CudaPath : CudaPathCandidates) {
				81	if (CudaPath.empty() \|\| !D.getVFS().exists(CudaPath))
				82	continue;
				83
				84	InstallPath = CudaPath;
				85	BinPath = CudaPath + "/bin";
				86	IncludePath = InstallPath + "/include";
				87	LibDevicePath = InstallPath + "/nvvm/libdevice";
				88
				89	auto &FS = D.getVFS();
Jonas Hahnfeld	e2c342f	2017-10-16 13:31:30 +0000	[diff] [blame]	90	if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	91	continue;
				92
				93	// On Linux, we have both lib and lib64 directories, and we need to choose
				94	// based on our triple. On MacOS, we have only a lib directory.
				95	//
				96	// It's sufficient for our purposes to be flexible: If both lib and lib64
				97	// exist, we choose whichever one matches our triple. Otherwise, if only
				98	// lib exists, we use it.
				99	if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
				100	LibPath = InstallPath + "/lib64";
				101	else if (FS.exists(InstallPath + "/lib"))
				102	LibPath = InstallPath + "/lib";
				103	else
				104	continue;
				105
				106	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
				107	FS.getBufferForFile(InstallPath + "/version.txt");
				108	if (!VersionFile) {
				109	// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
				110	// version.txt isn't present.
				111	Version = CudaVersion::CUDA_70;
				112	} else {
				113	Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
				114	}
				115
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	116	if (Version == CudaVersion::CUDA_90) {
				117	// CUDA-9 uses single libdevice file for all GPU variants.
				118	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
				119	if (FS.exists(FilePath)) {
				120	for (const char *GpuArch :
				121	{"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
				122	"sm_60", "sm_61", "sm_62", "sm_70"})
				123	LibDeviceMap[GpuArch] = FilePath;
				124	}
				125	} else {
				126	std::error_code EC;
				127	for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
				128	!EC && LI != LE; LI = LI.increment(EC)) {
				129	StringRef FilePath = LI->path();
				130	StringRef FileName = llvm::sys::path::filename(FilePath);
				131	// Process all bitcode filenames that look like
				132	// libdevice.compute_XX.YY.bc
				133	const StringRef LibDeviceName = "libdevice.";
				134	if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
				135	continue;
				136	StringRef GpuArch = FileName.slice(
				137	LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
				138	LibDeviceMap[GpuArch] = FilePath.str();
				139	// Insert map entries for specifc devices with this compute
				140	// capability. NVCC's choice of the libdevice library version is
				141	// rather peculiar and depends on the CUDA version.
				142	if (GpuArch == "compute_20") {
				143	LibDeviceMap["sm_20"] = FilePath;
				144	LibDeviceMap["sm_21"] = FilePath;
				145	LibDeviceMap["sm_32"] = FilePath;
				146	} else if (GpuArch == "compute_30") {
				147	LibDeviceMap["sm_30"] = FilePath;
				148	if (Version < CudaVersion::CUDA_80) {
				149	LibDeviceMap["sm_50"] = FilePath;
				150	LibDeviceMap["sm_52"] = FilePath;
				151	LibDeviceMap["sm_53"] = FilePath;
				152	}
				153	LibDeviceMap["sm_60"] = FilePath;
				154	LibDeviceMap["sm_61"] = FilePath;
				155	LibDeviceMap["sm_62"] = FilePath;
				156	} else if (GpuArch == "compute_35") {
				157	LibDeviceMap["sm_35"] = FilePath;
				158	LibDeviceMap["sm_37"] = FilePath;
				159	} else if (GpuArch == "compute_50") {
				160	if (Version >= CudaVersion::CUDA_80) {
				161	LibDeviceMap["sm_50"] = FilePath;
				162	LibDeviceMap["sm_52"] = FilePath;
				163	LibDeviceMap["sm_53"] = FilePath;
				164	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	165	}
				166	}
				167	}
				168
Jonas Hahnfeld	e2c342f	2017-10-16 13:31:30 +0000	[diff] [blame]	169	// Check that we have found at least one libdevice that we can link in if
				170	// -nocudalib hasn't been specified.
				171	if (LibDeviceMap.empty() && !Args.hasArg(options::OPT_nocudalib))
Gheorghe-Teodor Bercea	9c52574	2017-08-11 15:46:22 +0000	[diff] [blame]	172	continue;
				173
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	174	IsValid = true;
				175	break;
				176	}
				177	}
				178
				179	void CudaInstallationDetector::AddCudaIncludeArgs(
				180	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
				181	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
				182	// Add cuda_wrappers/* to our system include path. This lets us wrap
				183	// standard library headers.
				184	SmallString<128> P(D.ResourceDir);
				185	llvm::sys::path::append(P, "include");
				186	llvm::sys::path::append(P, "cuda_wrappers");
				187	CC1Args.push_back("-internal-isystem");
				188	CC1Args.push_back(DriverArgs.MakeArgString(P));
				189	}
				190
				191	if (DriverArgs.hasArg(options::OPT_nocudainc))
				192	return;
				193
				194	if (!isValid()) {
				195	D.Diag(diag::err_drv_no_cuda_installation);
				196	return;
				197	}
				198
				199	CC1Args.push_back("-internal-isystem");
				200	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
				201	CC1Args.push_back("-include");
				202	CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
				203	}
				204
				205	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
				206	CudaArch Arch) const {
				207	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
Justin Lebar	066494d	2017-10-25 21:32:06 +0000	[diff] [blame]	208	ArchsWithBadVersion.count(Arch) > 0)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	209	return;
				210
Justin Lebar	066494d	2017-10-25 21:32:06 +0000	[diff] [blame]	211	auto MinVersion = MinVersionForCudaArch(Arch);
				212	auto MaxVersion = MaxVersionForCudaArch(Arch);
				213	if (Version < MinVersion \|\| Version > MaxVersion) {
				214	ArchsWithBadVersion.insert(Arch);
				215	D.Diag(diag::err_drv_cuda_version_unsupported)
				216	<< CudaArchToString(Arch) << CudaVersionToString(MinVersion)
				217	<< CudaVersionToString(MaxVersion) << InstallPath
				218	<< CudaVersionToString(Version);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	219	}
				220	}
				221
				222	void CudaInstallationDetector::print(raw_ostream &OS) const {
				223	if (isValid())
				224	OS << "Found CUDA installation: " << InstallPath << ", version "
				225	<< CudaVersionToString(Version) << "\n";
				226	}
				227
				228	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
				229	const InputInfo &Output,
				230	const InputInfoList &Inputs,
				231	const ArgList &Args,
				232	const char *LinkingOutput) const {
				233	const auto &TC =
				234	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				235	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				236
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	237	StringRef GPUArchName;
				238	// If this is an OpenMP action we need to extract the device architecture
				239	// from the -march=arch option. This option may come from -Xopenmp-target
				240	// flag or the default value.
				241	if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
				242	GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
				243	assert(!GPUArchName.empty() && "Must have an architecture passed in.");
				244	} else
				245	GPUArchName = JA.getOffloadingArch();
				246
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	247	// Obtain architecture from the action.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	248	CudaArch gpu_arch = StringToCudaArch(GPUArchName);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	249	assert(gpu_arch != CudaArch::UNKNOWN &&
				250	"Device action expected to have an architecture.");
				251
				252	// Check that our installation's ptxas supports gpu_arch.
				253	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
				254	TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
				255	}
				256
				257	ArgStringList CmdArgs;
				258	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
				259	if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
				260	options::OPT_no_cuda_noopt_device_debug, false)) {
				261	// ptxas does not accept -g option if optimization is enabled, so
				262	// we ignore the compiler's -O* options if we want debug info.
				263	CmdArgs.push_back("-g");
				264	CmdArgs.push_back("--dont-merge-basicblocks");
				265	CmdArgs.push_back("--return-at-end");
				266	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
				267	// Map the -O we received to -O{0,1,2,3}.
				268	//
				269	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
				270	// default, so it may correspond more closely to the spirit of clang -O2.
				271
				272	// -O3 seems like the least-bad option when -Osomething is specified to
				273	// clang but it isn't handled below.
				274	StringRef OOpt = "3";
				275	if (A->getOption().matches(options::OPT_O4) \|\|
				276	A->getOption().matches(options::OPT_Ofast))
				277	OOpt = "3";
				278	else if (A->getOption().matches(options::OPT_O0))
				279	OOpt = "0";
				280	else if (A->getOption().matches(options::OPT_O)) {
				281	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
				282	OOpt = llvm::StringSwitch<const char *>(A->getValue())
				283	.Case("1", "1")
				284	.Case("2", "2")
				285	.Case("3", "3")
				286	.Case("s", "2")
				287	.Case("z", "2")
				288	.Default("2");
				289	}
				290	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
				291	} else {
				292	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
				293	// to no optimizations, but ptxas's default is -O3.
				294	CmdArgs.push_back("-O0");
				295	}
				296
Gheorghe-Teodor Bercea	53431bc	2017-08-07 20:19:23 +0000	[diff] [blame]	297	// Pass -v to ptxas if it was passed to the driver.
				298	if (Args.hasArg(options::OPT_v))
				299	CmdArgs.push_back("-v");
				300
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	301	CmdArgs.push_back("--gpu-name");
				302	CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
				303	CmdArgs.push_back("--output-file");
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	304	SmallString<256> OutputFileName(Output.getFilename());
				305	if (JA.isOffloading(Action::OFK_OpenMP))
				306	llvm::sys::path::replace_extension(OutputFileName, "cubin");
				307	CmdArgs.push_back(Args.MakeArgString(OutputFileName));
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	308	for (const auto& II : Inputs)
				309	CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
				310
				311	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
				312	CmdArgs.push_back(Args.MakeArgString(A));
				313
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	314	// In OpenMP we need to generate relocatable code.
Gheorghe-Teodor Bercea	0846582	2017-08-09 15:27:39 +0000	[diff] [blame]	315	if (JA.isOffloading(Action::OFK_OpenMP) &&
				316	Args.hasFlag(options::OPT_fopenmp_relocatable_target,
				317	options::OPT_fnoopenmp_relocatable_target,
				318	/Default=/ true))
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	319	CmdArgs.push_back("-c");
				320
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	321	const char *Exec;
				322	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
				323	Exec = A->getValue();
				324	else
				325	Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
				326	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				327	}
				328
				329	// All inputs to this linker must be from CudaDeviceActions, as we need to look
				330	// at the Inputs' Actions in order to figure out which GPU architecture they
				331	// correspond to.
				332	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
				333	const InputInfo &Output,
				334	const InputInfoList &Inputs,
				335	const ArgList &Args,
				336	const char *LinkingOutput) const {
				337	const auto &TC =
				338	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				339	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				340
				341	ArgStringList CmdArgs;
				342	CmdArgs.push_back("--cuda");
				343	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
				344	CmdArgs.push_back(Args.MakeArgString("--create"));
				345	CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
				346
				347	for (const auto& II : Inputs) {
				348	auto *A = II.getAction();
				349	assert(A->getInputs().size() == 1 &&
				350	"Device offload action is expected to have a single input");
				351	const char *gpu_arch_str = A->getOffloadingArch();
				352	assert(gpu_arch_str &&
				353	"Device action expected to have associated a GPU architecture!");
				354	CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
				355
				356	// We need to pass an Arch of the form "sm_XX" for cubin files and
				357	// "compute_XX" for ptx.
				358	const char *Arch =
				359	(II.getType() == types::TY_PP_Asm)
				360	? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
				361	: gpu_arch_str;
				362	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
				363	Arch + ",file=" + II.getFilename()));
				364	}
				365
				366	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
				367	CmdArgs.push_back(Args.MakeArgString(A));
				368
				369	const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
				370	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				371	}
				372
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	373	void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
				374	const InputInfo &Output,
				375	const InputInfoList &Inputs,
				376	const ArgList &Args,
				377	const char *LinkingOutput) const {
				378	const auto &TC =
				379	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				380	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				381
				382	ArgStringList CmdArgs;
				383
				384	// OpenMP uses nvlink to link cubin files. The result will be embedded in the
				385	// host binary by the host linker.
				386	assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
				387	"CUDA toolchain not expected for an OpenMP host device.");
				388
				389	if (Output.isFilename()) {
				390	CmdArgs.push_back("-o");
				391	CmdArgs.push_back(Output.getFilename());
				392	} else
				393	assert(Output.isNothing() && "Invalid output.");
				394	if (Args.hasArg(options::OPT_g_Flag))
				395	CmdArgs.push_back("-g");
				396
				397	if (Args.hasArg(options::OPT_v))
				398	CmdArgs.push_back("-v");
				399
				400	StringRef GPUArch =
				401	Args.getLastArgValue(options::OPT_march_EQ);
				402	assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
				403
				404	CmdArgs.push_back("-arch");
				405	CmdArgs.push_back(Args.MakeArgString(GPUArch));
				406
				407	// Add paths specified in LIBRARY_PATH environment variable as -L options.
				408	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
				409
				410	// Add paths for the default clang library path.
				411	SmallString<256> DefaultLibPath =
				412	llvm::sys::path::parent_path(TC.getDriver().Dir);
				413	llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
				414	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
				415
				416	// Add linking against library implementing OpenMP calls on NVPTX target.
				417	CmdArgs.push_back("-lomptarget-nvptx");
				418
				419	for (const auto &II : Inputs) {
				420	if (II.getType() == types::TY_LLVM_IR \|\|
				421	II.getType() == types::TY_LTO_IR \|\|
				422	II.getType() == types::TY_LTO_BC \|\|
				423	II.getType() == types::TY_LLVM_BC) {
				424	C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
				425	<< getToolChain().getTripleString();
				426	continue;
				427	}
				428
				429	// Currently, we only pass the input files to the linker, we do not pass
				430	// any libraries that may be valid only for the host.
				431	if (!II.isFilename())
				432	continue;
				433
Gheorghe-Teodor Bercea	5636f4b	2017-09-25 21:25:38 +0000	[diff] [blame]	434	SmallString<256> Name(II.getFilename());
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	435	llvm::sys::path::replace_extension(Name, "cubin");
				436
				437	const char *CubinF =
				438	C.addTempFile(C.getArgs().MakeArgString(Name));
				439
				440	CmdArgs.push_back(CubinF);
				441	}
				442
				443	AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
				444
				445	const char *Exec =
				446	Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
				447	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				448	}
				449
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	450	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
				451	/// which isn't properly a linker but nonetheless performs the step of stitching
				452	/// together object files from the assembler into a single blob.
				453
				454	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	455	const ToolChain &HostTC, const ArgList &Args,
				456	const Action::OffloadKind OK)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	457	: ToolChain(D, Triple, Args), HostTC(HostTC),
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	458	CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	459	if (CudaInstallation.isValid())
				460	getProgramPaths().push_back(CudaInstallation.getBinPath());
Gheorghe-Teodor Bercea	690f6f9	2017-08-09 19:52:28 +0000	[diff] [blame]	461	// Lookup binaries into the driver directory, this is used to
				462	// discover the clang-offload-bundler executable.
				463	getProgramPaths().push_back(getDriver().Dir);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	464	}
				465
				466	void CudaToolChain::addClangTargetOptions(
				467	const llvm::opt::ArgList &DriverArgs,
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	468	llvm::opt::ArgStringList &CC1Args,
				469	Action::OffloadKind DeviceOffloadingKind) const {
				470	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	471
				472	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				473	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	474	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
				475	DeviceOffloadingKind == Action::OFK_Cuda) &&
				476	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
				477
				478	if (DeviceOffloadingKind == Action::OFK_Cuda) {
				479	CC1Args.push_back("-fcuda-is-device");
				480
				481	if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
				482	options::OPT_fno_cuda_flush_denormals_to_zero, false))
				483	CC1Args.push_back("-fcuda-flush-denormals-to-zero");
				484
				485	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
				486	options::OPT_fno_cuda_approx_transcendentals, false))
				487	CC1Args.push_back("-fcuda-approx-transcendentals");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	488	}
				489
Gheorghe-Teodor Bercea	20789a5	2017-09-25 21:56:32 +0000	[diff] [blame]	490	if (DriverArgs.hasArg(options::OPT_nocudalib))
				491	return;
				492
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	493	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
				494
				495	if (LibDeviceFile.empty()) {
Gheorghe-Teodor Bercea	5a3608c	2017-09-26 15:36:20 +0000	[diff] [blame]	496	if (DeviceOffloadingKind == Action::OFK_OpenMP &&
				497	DriverArgs.hasArg(options::OPT_S))
				498	return;
				499
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	500	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
				501	return;
				502	}
				503
				504	CC1Args.push_back("-mlink-cuda-bitcode");
				505	CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
				506
Artem Belevich	4654dc8	2017-09-20 21:23:07 +0000	[diff] [blame]	507	if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
				508	// CUDA-9 uses new instructions that are only available in PTX6.0
				509	CC1Args.push_back("-target-feature");
				510	CC1Args.push_back("+ptx60");
				511	} else {
				512	// Libdevice in CUDA-7.0 requires PTX version that's more recent
				513	// than LLVM defaults to. Use PTX4.2 which is the PTX version that
				514	// came with CUDA-7.0.
				515	CC1Args.push_back("-target-feature");
				516	CC1Args.push_back("+ptx42");
				517	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	518	}
				519
				520	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
				521	ArgStringList &CC1Args) const {
				522	// Check our CUDA version if we're going to include the CUDA headers.
				523	if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
				524	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
				525	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				526	assert(!Arch.empty() && "Must have an explicit GPU arch.");
				527	CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
				528	}
				529	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
				530	}
				531
				532	llvm::opt::DerivedArgList *
				533	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
				534	StringRef BoundArch,
				535	Action::OffloadKind DeviceOffloadKind) const {
				536	DerivedArgList *DAL =
				537	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
				538	if (!DAL)
				539	DAL = new DerivedArgList(Args.getBaseArgs());
				540
				541	const OptTable &Opts = getDriver().getOpts();
				542
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	543	// For OpenMP device offloading, append derived arguments. Make sure
				544	// flags are not duplicated.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	545	// Also append the compute capability.
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	546	if (DeviceOffloadKind == Action::OFK_OpenMP) {
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	547	for (Arg *A : Args) {
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	548	bool IsDuplicate = false;
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	549	for (Arg DALArg : DAL) {
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	550	if (A == DALArg) {
				551	IsDuplicate = true;
				552	break;
				553	}
				554	}
				555	if (!IsDuplicate)
				556	DAL->append(A);
				557	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	558
				559	StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	560	if (Arch.empty())
				561	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
				562	CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	563
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	564	return DAL;
				565	}
				566
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	567	for (Arg *A : Args) {
				568	if (A->getOption().matches(options::OPT_Xarch__)) {
				569	// Skip this argument unless the architecture matches BoundArch
				570	if (BoundArch.empty() \|\| A->getValue(0) != BoundArch)
				571	continue;
				572
				573	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
				574	unsigned Prev = Index;
				575	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
				576
				577	// If the argument parsing failed or more than one argument was
				578	// consumed, the -Xarch_ argument's parameter tried to consume
				579	// extra arguments. Emit an error and ignore.
				580	//
				581	// We also want to disallow any options which would alter the
				582	// driver behavior; that isn't going to work in our model. We
				583	// use isDriverOption() as an approximation, although things
				584	// like -O4 are going to slip through.
				585	if (!XarchArg \|\| Index > Prev + 1) {
				586	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
				587	<< A->getAsString(Args);
				588	continue;
				589	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
				590	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
				591	<< A->getAsString(Args);
				592	continue;
				593	}
				594	XarchArg->setBaseArg(A);
				595	A = XarchArg.release();
				596	DAL->AddSynthesizedArg(A);
				597	}
				598	DAL->append(A);
				599	}
				600
				601	if (!BoundArch.empty()) {
				602	DAL->eraseArg(options::OPT_march_EQ);
				603	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
				604	}
				605	return DAL;
				606	}
				607
				608	Tool *CudaToolChain::buildAssembler() const {
				609	return new tools::NVPTX::Assembler(*this);
				610	}
				611
				612	Tool *CudaToolChain::buildLinker() const {
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	613	if (OK == Action::OFK_OpenMP)
				614	return new tools::NVPTX::OpenMPLinker(*this);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	615	return new tools::NVPTX::Linker(*this);
				616	}
				617
				618	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
				619	HostTC.addClangWarningOptions(CC1Args);
				620	}
				621
				622	ToolChain::CXXStdlibType
				623	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
				624	return HostTC.GetCXXStdlibType(Args);
				625	}
				626
				627	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
				628	ArgStringList &CC1Args) const {
				629	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
				630	}
				631
				632	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
				633	ArgStringList &CC1Args) const {
				634	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
				635	}
				636
				637	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
				638	ArgStringList &CC1Args) const {
				639	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
				640	}
				641
				642	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
				643	// The CudaToolChain only supports sanitizers in the sense that it allows
				644	// sanitizer arguments on the command line if they are supported by the host
				645	// toolchain. The CudaToolChain will actually ignore any command line
				646	// arguments for any of these "supported" sanitizers. That means that no
				647	// sanitization of device code is actually supported at this time.
				648	//
				649	// This behavior is necessary because the host and device toolchains
				650	// invocations often share the command line, so the device toolchain must
				651	// tolerate flags meant only for the host toolchain.
				652	return HostTC.getSupportedSanitizers();
				653	}
				654
				655	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
				656	const ArgList &Args) const {
				657	return HostTC.computeMSVCVersion(D, Args);
				658	}