Blame - clang/lib/Driver/ToolChains/Cuda.cpp - toolchain/llvm-project

blob: c83d066fa26e353656ad6e8c037e6db9977894ff [file] [log] [blame]

David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "Cuda.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	11	#include "CommonArgs.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	12	#include "InputInfo.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	13	#include "clang/Basic/Cuda.h"
				14	#include "clang/Basic/VirtualFileSystem.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	15	#include "clang/Config/config.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	16	#include "clang/Driver/Compilation.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	17	#include "clang/Driver/Distro.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	18	#include "clang/Driver/Driver.h"
				19	#include "clang/Driver/DriverDiagnostic.h"
				20	#include "clang/Driver/Options.h"
				21	#include "llvm/Option/ArgList.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	22	#include "llvm/Support/FileSystem.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	23	#include "llvm/Support/Path.h"
Gheorghe-Teodor Bercea	148046c	2018-03-13 19:39:19 +0000	[diff] [blame]	24	#include "llvm/Support/Process.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	25	#include "llvm/Support/Program.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	26	#include <system_error>
				27
				28	using namespace clang::driver;
				29	using namespace clang::driver::toolchains;
				30	using namespace clang::driver::tools;
				31	using namespace clang;
				32	using namespace llvm::opt;
				33
				34	// Parses the contents of version.txt in an CUDA installation. It should
				35	// contain one line of the from e.g. "CUDA Version 7.5.2".
				36	static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
				37	if (!V.startswith("CUDA Version "))
				38	return CudaVersion::UNKNOWN;
				39	V = V.substr(strlen("CUDA Version "));
				40	int Major = -1, Minor = -1;
				41	auto First = V.split('.');
				42	auto Second = First.second.split('.');
				43	if (First.first.getAsInteger(10, Major) \|\|
				44	Second.first.getAsInteger(10, Minor))
				45	return CudaVersion::UNKNOWN;
				46
				47	if (Major == 7 && Minor == 0) {
				48	// This doesn't appear to ever happen -- version.txt doesn't exist in the
				49	// CUDA 7 installs I've seen. But no harm in checking.
				50	return CudaVersion::CUDA_70;
				51	}
				52	if (Major == 7 && Minor == 5)
				53	return CudaVersion::CUDA_75;
				54	if (Major == 8 && Minor == 0)
				55	return CudaVersion::CUDA_80;
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	56	if (Major == 9 && Minor == 0)
				57	return CudaVersion::CUDA_90;
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	58	if (Major == 9 && Minor == 1)
				59	return CudaVersion::CUDA_91;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	60	return CudaVersion::UNKNOWN;
				61	}
				62
				63	CudaInstallationDetector::CudaInstallationDetector(
				64	const Driver &D, const llvm::Triple &HostTriple,
				65	const llvm::opt::ArgList &Args)
				66	: D(D) {
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	67	struct Candidate {
				68	std::string Path;
				69	bool StrictChecking;
				70
				71	Candidate(std::string Path, bool StrictChecking = false)
				72	: Path(Path), StrictChecking(StrictChecking) {}
				73	};
				74	SmallVector<Candidate, 4> Candidates;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	75
				76	// In decreasing order so we prefer newer versions to older versions.
				77	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
				78
				79	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	80	Candidates.emplace_back(
				81	Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	82	} else if (HostTriple.isOSWindows()) {
				83	for (const char *Ver : Versions)
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	84	Candidates.emplace_back(
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	85	D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
				86	Ver);
				87	} else {
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	88	if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
				89	// Try to find ptxas binary. If the executable is located in a directory
				90	// called 'bin/', its parent directory might be a good guess for a valid
				91	// CUDA installation.
				92	// However, some distributions might installs 'ptxas' to /usr/bin. In that
				93	// case the candidate would be '/usr' which passes the following checks
				94	// because '/usr/include' exists as well. To avoid this case, we always
				95	// check for the directory potentially containing files for libdevice,
				96	// even if the user passes -nocudalib.
				97	if (llvm::ErrorOr<std::string> ptxas =
				98	llvm::sys::findProgramByName("ptxas")) {
				99	SmallString<256> ptxasAbsolutePath;
				100	llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
				101
				102	StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
				103	if (llvm::sys::path::filename(ptxasDir) == "bin")
				104	Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
				105	/StrictChecking=/true);
				106	}
				107	}
				108
				109	Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	110	for (const char *Ver : Versions)
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	111	Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
Sylvestre Ledru	0cfcdc3	2017-11-29 15:03:28 +0000	[diff] [blame]	112
Ismail Donmez	64f99df	2017-11-29 15:18:02 +0000	[diff] [blame]	113	if (Distro(D.getVFS()).IsDebian())
Sylvestre Ledru	0cfcdc3	2017-11-29 15:03:28 +0000	[diff] [blame]	114	// Special case for Debian to have nvidia-cuda-toolkit work
				115	// out of the box. More info on http://bugs.debian.org/882505
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	116	Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	117	}
				118
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	119	bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
				120
				121	for (const auto &Candidate : Candidates) {
				122	InstallPath = Candidate.Path;
				123	if (InstallPath.empty() \|\| !D.getVFS().exists(InstallPath))
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	124	continue;
				125
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	126	BinPath = InstallPath + "/bin";
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	127	IncludePath = InstallPath + "/include";
				128	LibDevicePath = InstallPath + "/nvvm/libdevice";
				129
				130	auto &FS = D.getVFS();
Jonas Hahnfeld	e2c342f	2017-10-16 13:31:30 +0000	[diff] [blame]	131	if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	132	continue;
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	133	bool CheckLibDevice = (!NoCudaLib \|\| Candidate.StrictChecking);
				134	if (CheckLibDevice && !FS.exists(LibDevicePath))
				135	continue;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	136
				137	// On Linux, we have both lib and lib64 directories, and we need to choose
				138	// based on our triple. On MacOS, we have only a lib directory.
				139	//
				140	// It's sufficient for our purposes to be flexible: If both lib and lib64
				141	// exist, we choose whichever one matches our triple. Otherwise, if only
				142	// lib exists, we use it.
				143	if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
				144	LibPath = InstallPath + "/lib64";
				145	else if (FS.exists(InstallPath + "/lib"))
				146	LibPath = InstallPath + "/lib";
				147	else
				148	continue;
				149
				150	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
				151	FS.getBufferForFile(InstallPath + "/version.txt");
				152	if (!VersionFile) {
				153	// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
				154	// version.txt isn't present.
				155	Version = CudaVersion::CUDA_70;
				156	} else {
				157	Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
				158	}
				159
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	160	if (Version >= CudaVersion::CUDA_90) {
				161	// CUDA-9+ uses single libdevice file for all GPU variants.
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	162	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
				163	if (FS.exists(FilePath)) {
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	164	for (const char *GpuArchName :
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	165	{"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	166	"sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) {
				167	const CudaArch GpuArch = StringToCudaArch(GpuArchName);
				168	if (Version >= MinVersionForCudaArch(GpuArch) &&
				169	Version <= MaxVersionForCudaArch(GpuArch))
				170	LibDeviceMap[GpuArchName] = FilePath;
				171	}
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	172	}
				173	} else {
				174	std::error_code EC;
				175	for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
				176	!EC && LI != LE; LI = LI.increment(EC)) {
				177	StringRef FilePath = LI->path();
				178	StringRef FileName = llvm::sys::path::filename(FilePath);
				179	// Process all bitcode filenames that look like
				180	// libdevice.compute_XX.YY.bc
				181	const StringRef LibDeviceName = "libdevice.";
				182	if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
				183	continue;
				184	StringRef GpuArch = FileName.slice(
				185	LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
				186	LibDeviceMap[GpuArch] = FilePath.str();
Alexander Kornienko	2a8c18d	2018-04-06 15:14:32 +0000	[diff] [blame]	187	// Insert map entries for specific devices with this compute
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	188	// capability. NVCC's choice of the libdevice library version is
				189	// rather peculiar and depends on the CUDA version.
				190	if (GpuArch == "compute_20") {
				191	LibDeviceMap["sm_20"] = FilePath;
				192	LibDeviceMap["sm_21"] = FilePath;
				193	LibDeviceMap["sm_32"] = FilePath;
				194	} else if (GpuArch == "compute_30") {
				195	LibDeviceMap["sm_30"] = FilePath;
				196	if (Version < CudaVersion::CUDA_80) {
				197	LibDeviceMap["sm_50"] = FilePath;
				198	LibDeviceMap["sm_52"] = FilePath;
				199	LibDeviceMap["sm_53"] = FilePath;
				200	}
				201	LibDeviceMap["sm_60"] = FilePath;
				202	LibDeviceMap["sm_61"] = FilePath;
				203	LibDeviceMap["sm_62"] = FilePath;
				204	} else if (GpuArch == "compute_35") {
				205	LibDeviceMap["sm_35"] = FilePath;
				206	LibDeviceMap["sm_37"] = FilePath;
				207	} else if (GpuArch == "compute_50") {
				208	if (Version >= CudaVersion::CUDA_80) {
				209	LibDeviceMap["sm_50"] = FilePath;
				210	LibDeviceMap["sm_52"] = FilePath;
				211	LibDeviceMap["sm_53"] = FilePath;
				212	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	213	}
				214	}
				215	}
				216
Jonas Hahnfeld	e2c342f	2017-10-16 13:31:30 +0000	[diff] [blame]	217	// Check that we have found at least one libdevice that we can link in if
				218	// -nocudalib hasn't been specified.
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	219	if (LibDeviceMap.empty() && !NoCudaLib)
Gheorghe-Teodor Bercea	9c52574	2017-08-11 15:46:22 +0000	[diff] [blame]	220	continue;
				221
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	222	IsValid = true;
				223	break;
				224	}
				225	}
				226
				227	void CudaInstallationDetector::AddCudaIncludeArgs(
				228	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
				229	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
				230	// Add cuda_wrappers/* to our system include path. This lets us wrap
				231	// standard library headers.
				232	SmallString<128> P(D.ResourceDir);
				233	llvm::sys::path::append(P, "include");
				234	llvm::sys::path::append(P, "cuda_wrappers");
				235	CC1Args.push_back("-internal-isystem");
				236	CC1Args.push_back(DriverArgs.MakeArgString(P));
				237	}
				238
				239	if (DriverArgs.hasArg(options::OPT_nocudainc))
				240	return;
				241
				242	if (!isValid()) {
				243	D.Diag(diag::err_drv_no_cuda_installation);
				244	return;
				245	}
				246
				247	CC1Args.push_back("-internal-isystem");
				248	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
				249	CC1Args.push_back("-include");
				250	CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
				251	}
				252
				253	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
				254	CudaArch Arch) const {
				255	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
Justin Lebar	066494d	2017-10-25 21:32:06 +0000	[diff] [blame]	256	ArchsWithBadVersion.count(Arch) > 0)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	257	return;
				258
Justin Lebar	066494d	2017-10-25 21:32:06 +0000	[diff] [blame]	259	auto MinVersion = MinVersionForCudaArch(Arch);
				260	auto MaxVersion = MaxVersionForCudaArch(Arch);
				261	if (Version < MinVersion \|\| Version > MaxVersion) {
				262	ArchsWithBadVersion.insert(Arch);
				263	D.Diag(diag::err_drv_cuda_version_unsupported)
				264	<< CudaArchToString(Arch) << CudaVersionToString(MinVersion)
				265	<< CudaVersionToString(MaxVersion) << InstallPath
				266	<< CudaVersionToString(Version);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	267	}
				268	}
				269
				270	void CudaInstallationDetector::print(raw_ostream &OS) const {
				271	if (isValid())
				272	OS << "Found CUDA installation: " << InstallPath << ", version "
				273	<< CudaVersionToString(Version) << "\n";
				274	}
				275
				276	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
				277	const InputInfo &Output,
				278	const InputInfoList &Inputs,
				279	const ArgList &Args,
				280	const char *LinkingOutput) const {
				281	const auto &TC =
				282	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				283	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				284
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	285	StringRef GPUArchName;
				286	// If this is an OpenMP action we need to extract the device architecture
				287	// from the -march=arch option. This option may come from -Xopenmp-target
				288	// flag or the default value.
				289	if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
				290	GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
				291	assert(!GPUArchName.empty() && "Must have an architecture passed in.");
				292	} else
				293	GPUArchName = JA.getOffloadingArch();
				294
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	295	// Obtain architecture from the action.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	296	CudaArch gpu_arch = StringToCudaArch(GPUArchName);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	297	assert(gpu_arch != CudaArch::UNKNOWN &&
				298	"Device action expected to have an architecture.");
				299
				300	// Check that our installation's ptxas supports gpu_arch.
				301	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
				302	TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
				303	}
				304
				305	ArgStringList CmdArgs;
				306	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
				307	if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
				308	options::OPT_no_cuda_noopt_device_debug, false)) {
				309	// ptxas does not accept -g option if optimization is enabled, so
				310	// we ignore the compiler's -O* options if we want debug info.
				311	CmdArgs.push_back("-g");
				312	CmdArgs.push_back("--dont-merge-basicblocks");
				313	CmdArgs.push_back("--return-at-end");
				314	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
				315	// Map the -O we received to -O{0,1,2,3}.
				316	//
				317	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
				318	// default, so it may correspond more closely to the spirit of clang -O2.
				319
				320	// -O3 seems like the least-bad option when -Osomething is specified to
				321	// clang but it isn't handled below.
				322	StringRef OOpt = "3";
				323	if (A->getOption().matches(options::OPT_O4) \|\|
				324	A->getOption().matches(options::OPT_Ofast))
				325	OOpt = "3";
				326	else if (A->getOption().matches(options::OPT_O0))
				327	OOpt = "0";
				328	else if (A->getOption().matches(options::OPT_O)) {
				329	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
				330	OOpt = llvm::StringSwitch<const char *>(A->getValue())
				331	.Case("1", "1")
				332	.Case("2", "2")
				333	.Case("3", "3")
				334	.Case("s", "2")
				335	.Case("z", "2")
				336	.Default("2");
				337	}
				338	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
				339	} else {
				340	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
				341	// to no optimizations, but ptxas's default is -O3.
				342	CmdArgs.push_back("-O0");
				343	}
				344
Gheorghe-Teodor Bercea	53431bc	2017-08-07 20:19:23 +0000	[diff] [blame]	345	// Pass -v to ptxas if it was passed to the driver.
				346	if (Args.hasArg(options::OPT_v))
				347	CmdArgs.push_back("-v");
				348
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	349	CmdArgs.push_back("--gpu-name");
				350	CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
				351	CmdArgs.push_back("--output-file");
Jonas Hahnfeld	7c78cc5	2017-11-21 14:44:45 +0000	[diff] [blame]	352	CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	353	for (const auto& II : Inputs)
				354	CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
				355
				356	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
				357	CmdArgs.push_back(Args.MakeArgString(A));
				358
Jonas Hahnfeld	5379c6d	2018-02-12 10:46:45 +0000	[diff] [blame]	359	bool Relocatable = false;
				360	if (JA.isOffloading(Action::OFK_OpenMP))
				361	// In OpenMP we need to generate relocatable code.
				362	Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
				363	options::OPT_fnoopenmp_relocatable_target,
				364	/Default=/true);
				365	else if (JA.isOffloading(Action::OFK_Cuda))
				366	Relocatable = Args.hasFlag(options::OPT_fcuda_rdc,
				367	options::OPT_fno_cuda_rdc, /Default=/false);
				368
				369	if (Relocatable)
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	370	CmdArgs.push_back("-c");
				371
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	372	const char *Exec;
				373	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
				374	Exec = A->getValue();
				375	else
				376	Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
				377	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				378	}
				379
Artem Belevich	dde3dc2	2018-04-10 18:38:22 +0000	[diff] [blame^]	380	static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
				381	bool includePTX = true;
				382	for (Arg *A : Args) {
				383	if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) \|\|
				384	A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
				385	continue;
				386	A->claim();
				387	const StringRef ArchStr = A->getValue();
				388	if (ArchStr == "all" \|\| ArchStr == gpu_arch) {
				389	includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
				390	continue;
				391	}
				392	}
				393	return includePTX;
				394	}
				395
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	396	// All inputs to this linker must be from CudaDeviceActions, as we need to look
				397	// at the Inputs' Actions in order to figure out which GPU architecture they
				398	// correspond to.
				399	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
				400	const InputInfo &Output,
				401	const InputInfoList &Inputs,
				402	const ArgList &Args,
				403	const char *LinkingOutput) const {
				404	const auto &TC =
				405	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				406	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				407
				408	ArgStringList CmdArgs;
				409	CmdArgs.push_back("--cuda");
				410	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
				411	CmdArgs.push_back(Args.MakeArgString("--create"));
				412	CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
				413
				414	for (const auto& II : Inputs) {
				415	auto *A = II.getAction();
				416	assert(A->getInputs().size() == 1 &&
				417	"Device offload action is expected to have a single input");
				418	const char *gpu_arch_str = A->getOffloadingArch();
				419	assert(gpu_arch_str &&
				420	"Device action expected to have associated a GPU architecture!");
				421	CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
				422
Artem Belevich	dde3dc2	2018-04-10 18:38:22 +0000	[diff] [blame^]	423	if (II.getType() == types::TY_PP_Asm &&
				424	!shouldIncludePTX(Args, gpu_arch_str))
				425	continue;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	426	// We need to pass an Arch of the form "sm_XX" for cubin files and
				427	// "compute_XX" for ptx.
				428	const char *Arch =
				429	(II.getType() == types::TY_PP_Asm)
				430	? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
				431	: gpu_arch_str;
				432	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
				433	Arch + ",file=" + II.getFilename()));
				434	}
				435
				436	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
				437	CmdArgs.push_back(Args.MakeArgString(A));
				438
				439	const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
				440	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				441	}
				442
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	443	void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
				444	const InputInfo &Output,
				445	const InputInfoList &Inputs,
				446	const ArgList &Args,
				447	const char *LinkingOutput) const {
				448	const auto &TC =
				449	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				450	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				451
				452	ArgStringList CmdArgs;
				453
				454	// OpenMP uses nvlink to link cubin files. The result will be embedded in the
				455	// host binary by the host linker.
				456	assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
				457	"CUDA toolchain not expected for an OpenMP host device.");
				458
				459	if (Output.isFilename()) {
				460	CmdArgs.push_back("-o");
				461	CmdArgs.push_back(Output.getFilename());
				462	} else
				463	assert(Output.isNothing() && "Invalid output.");
				464	if (Args.hasArg(options::OPT_g_Flag))
				465	CmdArgs.push_back("-g");
				466
				467	if (Args.hasArg(options::OPT_v))
				468	CmdArgs.push_back("-v");
				469
				470	StringRef GPUArch =
				471	Args.getLastArgValue(options::OPT_march_EQ);
				472	assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
				473
				474	CmdArgs.push_back("-arch");
				475	CmdArgs.push_back(Args.MakeArgString(GPUArch));
				476
				477	// Add paths specified in LIBRARY_PATH environment variable as -L options.
				478	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
				479
				480	// Add paths for the default clang library path.
				481	SmallString<256> DefaultLibPath =
				482	llvm::sys::path::parent_path(TC.getDriver().Dir);
				483	llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
				484	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
				485
				486	// Add linking against library implementing OpenMP calls on NVPTX target.
				487	CmdArgs.push_back("-lomptarget-nvptx");
				488
				489	for (const auto &II : Inputs) {
				490	if (II.getType() == types::TY_LLVM_IR \|\|
				491	II.getType() == types::TY_LTO_IR \|\|
				492	II.getType() == types::TY_LTO_BC \|\|
				493	II.getType() == types::TY_LLVM_BC) {
				494	C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
				495	<< getToolChain().getTripleString();
				496	continue;
				497	}
				498
				499	// Currently, we only pass the input files to the linker, we do not pass
				500	// any libraries that may be valid only for the host.
				501	if (!II.isFilename())
				502	continue;
				503
Jonas Hahnfeld	7c78cc5	2017-11-21 14:44:45 +0000	[diff] [blame]	504	const char *CubinF = C.addTempFile(
				505	C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	506
				507	CmdArgs.push_back(CubinF);
				508	}
				509
				510	AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
				511
				512	const char *Exec =
				513	Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
				514	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				515	}
				516
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	517	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
				518	/// which isn't properly a linker but nonetheless performs the step of stitching
				519	/// together object files from the assembler into a single blob.
				520
				521	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	522	const ToolChain &HostTC, const ArgList &Args,
				523	const Action::OffloadKind OK)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	524	: ToolChain(D, Triple, Args), HostTC(HostTC),
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	525	CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	526	if (CudaInstallation.isValid())
				527	getProgramPaths().push_back(CudaInstallation.getBinPath());
Gheorghe-Teodor Bercea	690f6f9	2017-08-09 19:52:28 +0000	[diff] [blame]	528	// Lookup binaries into the driver directory, this is used to
				529	// discover the clang-offload-bundler executable.
				530	getProgramPaths().push_back(getDriver().Dir);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	531	}
				532
Jonas Hahnfeld	7c78cc5	2017-11-21 14:44:45 +0000	[diff] [blame]	533	std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
				534	// Only object files are changed, for example assembly files keep their .s
				535	// extensions. CUDA also continues to use .o as they don't use nvlink but
				536	// fatbinary.
				537	if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
				538	return ToolChain::getInputFilename(Input);
				539
				540	// Replace extension for object files with cubin because nvlink relies on
				541	// these particular file names.
				542	SmallString<256> Filename(ToolChain::getInputFilename(Input));
				543	llvm::sys::path::replace_extension(Filename, "cubin");
				544	return Filename.str();
				545	}
				546
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	547	void CudaToolChain::addClangTargetOptions(
				548	const llvm::opt::ArgList &DriverArgs,
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	549	llvm::opt::ArgStringList &CC1Args,
				550	Action::OffloadKind DeviceOffloadingKind) const {
				551	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	552
				553	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				554	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	555	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
				556	DeviceOffloadingKind == Action::OFK_Cuda) &&
				557	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
				558
				559	if (DeviceOffloadingKind == Action::OFK_Cuda) {
				560	CC1Args.push_back("-fcuda-is-device");
				561
				562	if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
				563	options::OPT_fno_cuda_flush_denormals_to_zero, false))
				564	CC1Args.push_back("-fcuda-flush-denormals-to-zero");
				565
				566	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
				567	options::OPT_fno_cuda_approx_transcendentals, false))
				568	CC1Args.push_back("-fcuda-approx-transcendentals");
Jonas Hahnfeld	5379c6d	2018-02-12 10:46:45 +0000	[diff] [blame]	569
				570	if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
				571	false))
				572	CC1Args.push_back("-fcuda-rdc");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	573	}
				574
Gheorghe-Teodor Bercea	20789a5	2017-09-25 21:56:32 +0000	[diff] [blame]	575	if (DriverArgs.hasArg(options::OPT_nocudalib))
				576	return;
				577
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	578	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
				579
				580	if (LibDeviceFile.empty()) {
Gheorghe-Teodor Bercea	5a3608c	2017-09-26 15:36:20 +0000	[diff] [blame]	581	if (DeviceOffloadingKind == Action::OFK_OpenMP &&
				582	DriverArgs.hasArg(options::OPT_S))
				583	return;
				584
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	585	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
				586	return;
				587	}
				588
				589	CC1Args.push_back("-mlink-cuda-bitcode");
				590	CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
				591
Artem Belevich	4654dc8	2017-09-20 21:23:07 +0000	[diff] [blame]	592	if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
				593	// CUDA-9 uses new instructions that are only available in PTX6.0
				594	CC1Args.push_back("-target-feature");
				595	CC1Args.push_back("+ptx60");
				596	} else {
				597	// Libdevice in CUDA-7.0 requires PTX version that's more recent
				598	// than LLVM defaults to. Use PTX4.2 which is the PTX version that
				599	// came with CUDA-7.0.
				600	CC1Args.push_back("-target-feature");
				601	CC1Args.push_back("+ptx42");
				602	}
Gheorghe-Teodor Bercea	0d5aa84	2018-03-13 23:19:52 +0000	[diff] [blame]	603
				604	if (DeviceOffloadingKind == Action::OFK_OpenMP) {
				605	SmallVector<StringRef, 8> LibraryPaths;
				606	// Add path to lib and/or lib64 folders.
				607	SmallString<256> DefaultLibPath =
				608	llvm::sys::path::parent_path(getDriver().Dir);
				609	llvm::sys::path::append(DefaultLibPath,
				610	Twine("lib") + CLANG_LIBDIR_SUFFIX);
				611	LibraryPaths.emplace_back(DefaultLibPath.c_str());
				612
				613	// Add user defined library paths from LIBRARY_PATH.
				614	llvm::Optional<std::string> LibPath =
				615	llvm::sys::Process::GetEnv("LIBRARY_PATH");
				616	if (LibPath) {
				617	SmallVector<StringRef, 8> Frags;
				618	const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
				619	llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
				620	for (StringRef Path : Frags)
				621	LibraryPaths.emplace_back(Path.trim());
				622	}
				623
				624	std::string LibOmpTargetName =
				625	"libomptarget-nvptx-" + GpuArch.str() + ".bc";
				626	bool FoundBCLibrary = false;
				627	for (StringRef LibraryPath : LibraryPaths) {
				628	SmallString<128> LibOmpTargetFile(LibraryPath);
				629	llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
				630	if (llvm::sys::fs::exists(LibOmpTargetFile)) {
				631	CC1Args.push_back("-mlink-cuda-bitcode");
				632	CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
				633	FoundBCLibrary = true;
				634	break;
				635	}
				636	}
				637	if (!FoundBCLibrary)
				638	getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
				639	<< LibOmpTargetName;
				640	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	641	}
				642
				643	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
				644	ArgStringList &CC1Args) const {
				645	// Check our CUDA version if we're going to include the CUDA headers.
				646	if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
				647	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
				648	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				649	assert(!Arch.empty() && "Must have an explicit GPU arch.");
				650	CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
				651	}
				652	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
				653	}
				654
				655	llvm::opt::DerivedArgList *
				656	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
				657	StringRef BoundArch,
				658	Action::OffloadKind DeviceOffloadKind) const {
				659	DerivedArgList *DAL =
				660	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
				661	if (!DAL)
				662	DAL = new DerivedArgList(Args.getBaseArgs());
				663
				664	const OptTable &Opts = getDriver().getOpts();
				665
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	666	// For OpenMP device offloading, append derived arguments. Make sure
				667	// flags are not duplicated.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	668	// Also append the compute capability.
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	669	if (DeviceOffloadKind == Action::OFK_OpenMP) {
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	670	for (Arg *A : Args) {
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	671	bool IsDuplicate = false;
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	672	for (Arg DALArg : DAL) {
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	673	if (A == DALArg) {
				674	IsDuplicate = true;
				675	break;
				676	}
				677	}
				678	if (!IsDuplicate)
				679	DAL->append(A);
				680	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	681
				682	StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	683	if (Arch.empty())
				684	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
				685	CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	686
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	687	return DAL;
				688	}
				689
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	690	for (Arg *A : Args) {
				691	if (A->getOption().matches(options::OPT_Xarch__)) {
				692	// Skip this argument unless the architecture matches BoundArch
				693	if (BoundArch.empty() \|\| A->getValue(0) != BoundArch)
				694	continue;
				695
				696	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
				697	unsigned Prev = Index;
				698	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
				699
				700	// If the argument parsing failed or more than one argument was
				701	// consumed, the -Xarch_ argument's parameter tried to consume
				702	// extra arguments. Emit an error and ignore.
				703	//
				704	// We also want to disallow any options which would alter the
				705	// driver behavior; that isn't going to work in our model. We
				706	// use isDriverOption() as an approximation, although things
				707	// like -O4 are going to slip through.
				708	if (!XarchArg \|\| Index > Prev + 1) {
				709	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
				710	<< A->getAsString(Args);
				711	continue;
				712	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
				713	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
				714	<< A->getAsString(Args);
				715	continue;
				716	}
				717	XarchArg->setBaseArg(A);
				718	A = XarchArg.release();
				719	DAL->AddSynthesizedArg(A);
				720	}
				721	DAL->append(A);
				722	}
				723
				724	if (!BoundArch.empty()) {
				725	DAL->eraseArg(options::OPT_march_EQ);
				726	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
				727	}
				728	return DAL;
				729	}
				730
				731	Tool *CudaToolChain::buildAssembler() const {
				732	return new tools::NVPTX::Assembler(*this);
				733	}
				734
				735	Tool *CudaToolChain::buildLinker() const {
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	736	if (OK == Action::OFK_OpenMP)
				737	return new tools::NVPTX::OpenMPLinker(*this);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	738	return new tools::NVPTX::Linker(*this);
				739	}
				740
				741	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
				742	HostTC.addClangWarningOptions(CC1Args);
				743	}
				744
				745	ToolChain::CXXStdlibType
				746	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
				747	return HostTC.GetCXXStdlibType(Args);
				748	}
				749
				750	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
				751	ArgStringList &CC1Args) const {
				752	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
				753	}
				754
				755	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
				756	ArgStringList &CC1Args) const {
				757	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
				758	}
				759
				760	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
				761	ArgStringList &CC1Args) const {
				762	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
				763	}
				764
				765	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
				766	// The CudaToolChain only supports sanitizers in the sense that it allows
				767	// sanitizer arguments on the command line if they are supported by the host
				768	// toolchain. The CudaToolChain will actually ignore any command line
				769	// arguments for any of these "supported" sanitizers. That means that no
				770	// sanitization of device code is actually supported at this time.
				771	//
				772	// This behavior is necessary because the host and device toolchains
				773	// invocations often share the command line, so the device toolchain must
				774	// tolerate flags meant only for the host toolchain.
				775	return HostTC.getSupportedSanitizers();
				776	}
				777
				778	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
				779	const ArgList &Args) const {
				780	return HostTC.computeMSVCVersion(D, Args);
				781	}