Blame - clang/lib/Driver/ToolChains/Cuda.cpp - toolchain/llvm-project

blob: 64e62cb3e0e2a79a49c977232829b0363db11ce9 [file] [log] [blame]

David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "Cuda.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	11	#include "CommonArgs.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	12	#include "InputInfo.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	13	#include "clang/Basic/Cuda.h"
				14	#include "clang/Basic/VirtualFileSystem.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	15	#include "clang/Config/config.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	16	#include "clang/Driver/Compilation.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	17	#include "clang/Driver/Distro.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	18	#include "clang/Driver/Driver.h"
				19	#include "clang/Driver/DriverDiagnostic.h"
				20	#include "clang/Driver/Options.h"
				21	#include "llvm/Option/ArgList.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	22	#include "llvm/Support/FileSystem.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	23	#include "llvm/Support/Path.h"
Gheorghe-Teodor Bercea	148046c	2018-03-13 19:39:19 +0000	[diff] [blame]	24	#include "llvm/Support/Process.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	25	#include "llvm/Support/Program.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	26	#include <system_error>
				27
				28	using namespace clang::driver;
				29	using namespace clang::driver::toolchains;
				30	using namespace clang::driver::tools;
				31	using namespace clang;
				32	using namespace llvm::opt;
				33
				34	// Parses the contents of version.txt in an CUDA installation. It should
				35	// contain one line of the from e.g. "CUDA Version 7.5.2".
				36	static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
				37	if (!V.startswith("CUDA Version "))
				38	return CudaVersion::UNKNOWN;
				39	V = V.substr(strlen("CUDA Version "));
				40	int Major = -1, Minor = -1;
				41	auto First = V.split('.');
				42	auto Second = First.second.split('.');
				43	if (First.first.getAsInteger(10, Major) \|\|
				44	Second.first.getAsInteger(10, Minor))
				45	return CudaVersion::UNKNOWN;
				46
				47	if (Major == 7 && Minor == 0) {
				48	// This doesn't appear to ever happen -- version.txt doesn't exist in the
				49	// CUDA 7 installs I've seen. But no harm in checking.
				50	return CudaVersion::CUDA_70;
				51	}
				52	if (Major == 7 && Minor == 5)
				53	return CudaVersion::CUDA_75;
				54	if (Major == 8 && Minor == 0)
				55	return CudaVersion::CUDA_80;
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	56	if (Major == 9 && Minor == 0)
				57	return CudaVersion::CUDA_90;
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	58	if (Major == 9 && Minor == 1)
				59	return CudaVersion::CUDA_91;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	60	return CudaVersion::UNKNOWN;
				61	}
				62
				63	CudaInstallationDetector::CudaInstallationDetector(
				64	const Driver &D, const llvm::Triple &HostTriple,
				65	const llvm::opt::ArgList &Args)
				66	: D(D) {
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	67	struct Candidate {
				68	std::string Path;
				69	bool StrictChecking;
				70
				71	Candidate(std::string Path, bool StrictChecking = false)
				72	: Path(Path), StrictChecking(StrictChecking) {}
				73	};
				74	SmallVector<Candidate, 4> Candidates;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	75
				76	// In decreasing order so we prefer newer versions to older versions.
				77	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
				78
				79	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	80	Candidates.emplace_back(
				81	Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	82	} else if (HostTriple.isOSWindows()) {
				83	for (const char *Ver : Versions)
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	84	Candidates.emplace_back(
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	85	D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
				86	Ver);
				87	} else {
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	88	if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
				89	// Try to find ptxas binary. If the executable is located in a directory
				90	// called 'bin/', its parent directory might be a good guess for a valid
				91	// CUDA installation.
				92	// However, some distributions might installs 'ptxas' to /usr/bin. In that
				93	// case the candidate would be '/usr' which passes the following checks
				94	// because '/usr/include' exists as well. To avoid this case, we always
				95	// check for the directory potentially containing files for libdevice,
				96	// even if the user passes -nocudalib.
				97	if (llvm::ErrorOr<std::string> ptxas =
				98	llvm::sys::findProgramByName("ptxas")) {
				99	SmallString<256> ptxasAbsolutePath;
				100	llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
				101
				102	StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
				103	if (llvm::sys::path::filename(ptxasDir) == "bin")
				104	Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
				105	/StrictChecking=/true);
				106	}
				107	}
				108
				109	Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	110	for (const char *Ver : Versions)
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	111	Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
Sylvestre Ledru	0cfcdc3	2017-11-29 15:03:28 +0000	[diff] [blame]	112
Ismail Donmez	64f99df	2017-11-29 15:18:02 +0000	[diff] [blame]	113	if (Distro(D.getVFS()).IsDebian())
Sylvestre Ledru	0cfcdc3	2017-11-29 15:03:28 +0000	[diff] [blame]	114	// Special case for Debian to have nvidia-cuda-toolkit work
				115	// out of the box. More info on http://bugs.debian.org/882505
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	116	Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	117	}
				118
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	119	bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
				120
				121	for (const auto &Candidate : Candidates) {
				122	InstallPath = Candidate.Path;
				123	if (InstallPath.empty() \|\| !D.getVFS().exists(InstallPath))
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	124	continue;
				125
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	126	BinPath = InstallPath + "/bin";
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	127	IncludePath = InstallPath + "/include";
				128	LibDevicePath = InstallPath + "/nvvm/libdevice";
				129
				130	auto &FS = D.getVFS();
Jonas Hahnfeld	e2c342f	2017-10-16 13:31:30 +0000	[diff] [blame]	131	if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	132	continue;
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	133	bool CheckLibDevice = (!NoCudaLib \|\| Candidate.StrictChecking);
				134	if (CheckLibDevice && !FS.exists(LibDevicePath))
				135	continue;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	136
				137	// On Linux, we have both lib and lib64 directories, and we need to choose
				138	// based on our triple. On MacOS, we have only a lib directory.
				139	//
				140	// It's sufficient for our purposes to be flexible: If both lib and lib64
				141	// exist, we choose whichever one matches our triple. Otherwise, if only
				142	// lib exists, we use it.
				143	if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
				144	LibPath = InstallPath + "/lib64";
				145	else if (FS.exists(InstallPath + "/lib"))
				146	LibPath = InstallPath + "/lib";
				147	else
				148	continue;
				149
				150	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
				151	FS.getBufferForFile(InstallPath + "/version.txt");
				152	if (!VersionFile) {
				153	// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
				154	// version.txt isn't present.
				155	Version = CudaVersion::CUDA_70;
				156	} else {
				157	Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
				158	}
				159
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	160	if (Version >= CudaVersion::CUDA_90) {
				161	// CUDA-9+ uses single libdevice file for all GPU variants.
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	162	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
				163	if (FS.exists(FilePath)) {
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	164	for (const char *GpuArchName :
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	165	{"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	166	"sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) {
				167	const CudaArch GpuArch = StringToCudaArch(GpuArchName);
				168	if (Version >= MinVersionForCudaArch(GpuArch) &&
				169	Version <= MaxVersionForCudaArch(GpuArch))
				170	LibDeviceMap[GpuArchName] = FilePath;
				171	}
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	172	}
				173	} else {
				174	std::error_code EC;
				175	for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
				176	!EC && LI != LE; LI = LI.increment(EC)) {
				177	StringRef FilePath = LI->path();
				178	StringRef FileName = llvm::sys::path::filename(FilePath);
				179	// Process all bitcode filenames that look like
				180	// libdevice.compute_XX.YY.bc
				181	const StringRef LibDeviceName = "libdevice.";
				182	if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
				183	continue;
				184	StringRef GpuArch = FileName.slice(
				185	LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
				186	LibDeviceMap[GpuArch] = FilePath.str();
				187	// Insert map entries for specifc devices with this compute
				188	// capability. NVCC's choice of the libdevice library version is
				189	// rather peculiar and depends on the CUDA version.
				190	if (GpuArch == "compute_20") {
				191	LibDeviceMap["sm_20"] = FilePath;
				192	LibDeviceMap["sm_21"] = FilePath;
				193	LibDeviceMap["sm_32"] = FilePath;
				194	} else if (GpuArch == "compute_30") {
				195	LibDeviceMap["sm_30"] = FilePath;
				196	if (Version < CudaVersion::CUDA_80) {
				197	LibDeviceMap["sm_50"] = FilePath;
				198	LibDeviceMap["sm_52"] = FilePath;
				199	LibDeviceMap["sm_53"] = FilePath;
				200	}
				201	LibDeviceMap["sm_60"] = FilePath;
				202	LibDeviceMap["sm_61"] = FilePath;
				203	LibDeviceMap["sm_62"] = FilePath;
				204	} else if (GpuArch == "compute_35") {
				205	LibDeviceMap["sm_35"] = FilePath;
				206	LibDeviceMap["sm_37"] = FilePath;
				207	} else if (GpuArch == "compute_50") {
				208	if (Version >= CudaVersion::CUDA_80) {
				209	LibDeviceMap["sm_50"] = FilePath;
				210	LibDeviceMap["sm_52"] = FilePath;
				211	LibDeviceMap["sm_53"] = FilePath;
				212	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	213	}
				214	}
				215	}
				216
Jonas Hahnfeld	e2c342f	2017-10-16 13:31:30 +0000	[diff] [blame]	217	// Check that we have found at least one libdevice that we can link in if
				218	// -nocudalib hasn't been specified.
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	219	if (LibDeviceMap.empty() && !NoCudaLib)
Gheorghe-Teodor Bercea	9c52574	2017-08-11 15:46:22 +0000	[diff] [blame]	220	continue;
				221
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	222	IsValid = true;
				223	break;
				224	}
				225	}
				226
				227	void CudaInstallationDetector::AddCudaIncludeArgs(
				228	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
				229	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
				230	// Add cuda_wrappers/* to our system include path. This lets us wrap
				231	// standard library headers.
				232	SmallString<128> P(D.ResourceDir);
				233	llvm::sys::path::append(P, "include");
				234	llvm::sys::path::append(P, "cuda_wrappers");
				235	CC1Args.push_back("-internal-isystem");
				236	CC1Args.push_back(DriverArgs.MakeArgString(P));
				237	}
				238
				239	if (DriverArgs.hasArg(options::OPT_nocudainc))
				240	return;
				241
				242	if (!isValid()) {
				243	D.Diag(diag::err_drv_no_cuda_installation);
				244	return;
				245	}
				246
				247	CC1Args.push_back("-internal-isystem");
				248	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
				249	CC1Args.push_back("-include");
				250	CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
				251	}
				252
				253	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
				254	CudaArch Arch) const {
				255	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
Justin Lebar	066494d	2017-10-25 21:32:06 +0000	[diff] [blame]	256	ArchsWithBadVersion.count(Arch) > 0)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	257	return;
				258
Justin Lebar	066494d	2017-10-25 21:32:06 +0000	[diff] [blame]	259	auto MinVersion = MinVersionForCudaArch(Arch);
				260	auto MaxVersion = MaxVersionForCudaArch(Arch);
				261	if (Version < MinVersion \|\| Version > MaxVersion) {
				262	ArchsWithBadVersion.insert(Arch);
				263	D.Diag(diag::err_drv_cuda_version_unsupported)
				264	<< CudaArchToString(Arch) << CudaVersionToString(MinVersion)
				265	<< CudaVersionToString(MaxVersion) << InstallPath
				266	<< CudaVersionToString(Version);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	267	}
				268	}
				269
				270	void CudaInstallationDetector::print(raw_ostream &OS) const {
				271	if (isValid())
				272	OS << "Found CUDA installation: " << InstallPath << ", version "
				273	<< CudaVersionToString(Version) << "\n";
				274	}
				275
				276	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
				277	const InputInfo &Output,
				278	const InputInfoList &Inputs,
				279	const ArgList &Args,
				280	const char *LinkingOutput) const {
				281	const auto &TC =
				282	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				283	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				284
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	285	StringRef GPUArchName;
				286	// If this is an OpenMP action we need to extract the device architecture
				287	// from the -march=arch option. This option may come from -Xopenmp-target
				288	// flag or the default value.
				289	if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
				290	GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
				291	assert(!GPUArchName.empty() && "Must have an architecture passed in.");
				292	} else
				293	GPUArchName = JA.getOffloadingArch();
				294
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	295	// Obtain architecture from the action.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	296	CudaArch gpu_arch = StringToCudaArch(GPUArchName);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	297	assert(gpu_arch != CudaArch::UNKNOWN &&
				298	"Device action expected to have an architecture.");
				299
				300	// Check that our installation's ptxas supports gpu_arch.
				301	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
				302	TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
				303	}
				304
				305	ArgStringList CmdArgs;
				306	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
				307	if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
				308	options::OPT_no_cuda_noopt_device_debug, false)) {
				309	// ptxas does not accept -g option if optimization is enabled, so
				310	// we ignore the compiler's -O* options if we want debug info.
				311	CmdArgs.push_back("-g");
				312	CmdArgs.push_back("--dont-merge-basicblocks");
				313	CmdArgs.push_back("--return-at-end");
				314	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
				315	// Map the -O we received to -O{0,1,2,3}.
				316	//
				317	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
				318	// default, so it may correspond more closely to the spirit of clang -O2.
				319
				320	// -O3 seems like the least-bad option when -Osomething is specified to
				321	// clang but it isn't handled below.
				322	StringRef OOpt = "3";
				323	if (A->getOption().matches(options::OPT_O4) \|\|
				324	A->getOption().matches(options::OPT_Ofast))
				325	OOpt = "3";
				326	else if (A->getOption().matches(options::OPT_O0))
				327	OOpt = "0";
				328	else if (A->getOption().matches(options::OPT_O)) {
				329	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
				330	OOpt = llvm::StringSwitch<const char *>(A->getValue())
				331	.Case("1", "1")
				332	.Case("2", "2")
				333	.Case("3", "3")
				334	.Case("s", "2")
				335	.Case("z", "2")
				336	.Default("2");
				337	}
				338	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
				339	} else {
				340	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
				341	// to no optimizations, but ptxas's default is -O3.
				342	CmdArgs.push_back("-O0");
				343	}
				344
Gheorghe-Teodor Bercea	53431bc	2017-08-07 20:19:23 +0000	[diff] [blame]	345	// Pass -v to ptxas if it was passed to the driver.
				346	if (Args.hasArg(options::OPT_v))
				347	CmdArgs.push_back("-v");
				348
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	349	CmdArgs.push_back("--gpu-name");
				350	CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
				351	CmdArgs.push_back("--output-file");
Jonas Hahnfeld	7c78cc5	2017-11-21 14:44:45 +0000	[diff] [blame]	352	CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	353	for (const auto& II : Inputs)
				354	CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
				355
				356	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
				357	CmdArgs.push_back(Args.MakeArgString(A));
				358
Jonas Hahnfeld	5379c6d	2018-02-12 10:46:45 +0000	[diff] [blame]	359	bool Relocatable = false;
				360	if (JA.isOffloading(Action::OFK_OpenMP))
				361	// In OpenMP we need to generate relocatable code.
				362	Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
				363	options::OPT_fnoopenmp_relocatable_target,
				364	/Default=/true);
				365	else if (JA.isOffloading(Action::OFK_Cuda))
				366	Relocatable = Args.hasFlag(options::OPT_fcuda_rdc,
				367	options::OPT_fno_cuda_rdc, /Default=/false);
				368
				369	if (Relocatable)
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	370	CmdArgs.push_back("-c");
				371
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	372	const char *Exec;
				373	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
				374	Exec = A->getValue();
				375	else
				376	Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
				377	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				378	}
				379
				380	// All inputs to this linker must be from CudaDeviceActions, as we need to look
				381	// at the Inputs' Actions in order to figure out which GPU architecture they
				382	// correspond to.
				383	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
				384	const InputInfo &Output,
				385	const InputInfoList &Inputs,
				386	const ArgList &Args,
				387	const char *LinkingOutput) const {
				388	const auto &TC =
				389	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				390	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				391
				392	ArgStringList CmdArgs;
				393	CmdArgs.push_back("--cuda");
				394	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
				395	CmdArgs.push_back(Args.MakeArgString("--create"));
				396	CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
				397
				398	for (const auto& II : Inputs) {
				399	auto *A = II.getAction();
				400	assert(A->getInputs().size() == 1 &&
				401	"Device offload action is expected to have a single input");
				402	const char *gpu_arch_str = A->getOffloadingArch();
				403	assert(gpu_arch_str &&
				404	"Device action expected to have associated a GPU architecture!");
				405	CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
				406
				407	// We need to pass an Arch of the form "sm_XX" for cubin files and
				408	// "compute_XX" for ptx.
				409	const char *Arch =
				410	(II.getType() == types::TY_PP_Asm)
				411	? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
				412	: gpu_arch_str;
				413	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
				414	Arch + ",file=" + II.getFilename()));
				415	}
				416
				417	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
				418	CmdArgs.push_back(Args.MakeArgString(A));
				419
				420	const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
				421	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				422	}
				423
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	424	void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
				425	const InputInfo &Output,
				426	const InputInfoList &Inputs,
				427	const ArgList &Args,
				428	const char *LinkingOutput) const {
				429	const auto &TC =
				430	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				431	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				432
				433	ArgStringList CmdArgs;
				434
				435	// OpenMP uses nvlink to link cubin files. The result will be embedded in the
				436	// host binary by the host linker.
				437	assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
				438	"CUDA toolchain not expected for an OpenMP host device.");
				439
				440	if (Output.isFilename()) {
				441	CmdArgs.push_back("-o");
				442	CmdArgs.push_back(Output.getFilename());
				443	} else
				444	assert(Output.isNothing() && "Invalid output.");
				445	if (Args.hasArg(options::OPT_g_Flag))
				446	CmdArgs.push_back("-g");
				447
				448	if (Args.hasArg(options::OPT_v))
				449	CmdArgs.push_back("-v");
				450
				451	StringRef GPUArch =
				452	Args.getLastArgValue(options::OPT_march_EQ);
				453	assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
				454
				455	CmdArgs.push_back("-arch");
				456	CmdArgs.push_back(Args.MakeArgString(GPUArch));
				457
				458	// Add paths specified in LIBRARY_PATH environment variable as -L options.
				459	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
				460
				461	// Add paths for the default clang library path.
				462	SmallString<256> DefaultLibPath =
				463	llvm::sys::path::parent_path(TC.getDriver().Dir);
				464	llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
				465	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
				466
				467	// Add linking against library implementing OpenMP calls on NVPTX target.
				468	CmdArgs.push_back("-lomptarget-nvptx");
				469
				470	for (const auto &II : Inputs) {
				471	if (II.getType() == types::TY_LLVM_IR \|\|
				472	II.getType() == types::TY_LTO_IR \|\|
				473	II.getType() == types::TY_LTO_BC \|\|
				474	II.getType() == types::TY_LLVM_BC) {
				475	C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
				476	<< getToolChain().getTripleString();
				477	continue;
				478	}
				479
				480	// Currently, we only pass the input files to the linker, we do not pass
				481	// any libraries that may be valid only for the host.
				482	if (!II.isFilename())
				483	continue;
				484
Jonas Hahnfeld	7c78cc5	2017-11-21 14:44:45 +0000	[diff] [blame]	485	const char *CubinF = C.addTempFile(
				486	C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	487
				488	CmdArgs.push_back(CubinF);
				489	}
				490
				491	AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
				492
				493	const char *Exec =
				494	Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
				495	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				496	}
				497
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	498	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
				499	/// which isn't properly a linker but nonetheless performs the step of stitching
				500	/// together object files from the assembler into a single blob.
				501
				502	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	503	const ToolChain &HostTC, const ArgList &Args,
				504	const Action::OffloadKind OK)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	505	: ToolChain(D, Triple, Args), HostTC(HostTC),
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	506	CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	507	if (CudaInstallation.isValid())
				508	getProgramPaths().push_back(CudaInstallation.getBinPath());
Gheorghe-Teodor Bercea	690f6f9	2017-08-09 19:52:28 +0000	[diff] [blame]	509	// Lookup binaries into the driver directory, this is used to
				510	// discover the clang-offload-bundler executable.
				511	getProgramPaths().push_back(getDriver().Dir);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	512	}
				513
Jonas Hahnfeld	7c78cc5	2017-11-21 14:44:45 +0000	[diff] [blame]	514	std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
				515	// Only object files are changed, for example assembly files keep their .s
				516	// extensions. CUDA also continues to use .o as they don't use nvlink but
				517	// fatbinary.
				518	if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
				519	return ToolChain::getInputFilename(Input);
				520
				521	// Replace extension for object files with cubin because nvlink relies on
				522	// these particular file names.
				523	SmallString<256> Filename(ToolChain::getInputFilename(Input));
				524	llvm::sys::path::replace_extension(Filename, "cubin");
				525	return Filename.str();
				526	}
				527
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	528	void CudaToolChain::addClangTargetOptions(
				529	const llvm::opt::ArgList &DriverArgs,
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	530	llvm::opt::ArgStringList &CC1Args,
				531	Action::OffloadKind DeviceOffloadingKind) const {
				532	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	533
				534	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				535	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	536	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
				537	DeviceOffloadingKind == Action::OFK_Cuda) &&
				538	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
				539
				540	if (DeviceOffloadingKind == Action::OFK_Cuda) {
				541	CC1Args.push_back("-fcuda-is-device");
				542
				543	if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
				544	options::OPT_fno_cuda_flush_denormals_to_zero, false))
				545	CC1Args.push_back("-fcuda-flush-denormals-to-zero");
				546
				547	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
				548	options::OPT_fno_cuda_approx_transcendentals, false))
				549	CC1Args.push_back("-fcuda-approx-transcendentals");
Jonas Hahnfeld	5379c6d	2018-02-12 10:46:45 +0000	[diff] [blame]	550
				551	if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
				552	false))
				553	CC1Args.push_back("-fcuda-rdc");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	554	}
				555
Gheorghe-Teodor Bercea	20789a5	2017-09-25 21:56:32 +0000	[diff] [blame]	556	if (DriverArgs.hasArg(options::OPT_nocudalib))
				557	return;
				558
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	559	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
				560
				561	if (LibDeviceFile.empty()) {
Gheorghe-Teodor Bercea	5a3608c	2017-09-26 15:36:20 +0000	[diff] [blame]	562	if (DeviceOffloadingKind == Action::OFK_OpenMP &&
				563	DriverArgs.hasArg(options::OPT_S))
				564	return;
				565
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	566	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
				567	return;
				568	}
				569
				570	CC1Args.push_back("-mlink-cuda-bitcode");
				571	CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
				572
Artem Belevich	4654dc8	2017-09-20 21:23:07 +0000	[diff] [blame]	573	if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
				574	// CUDA-9 uses new instructions that are only available in PTX6.0
				575	CC1Args.push_back("-target-feature");
				576	CC1Args.push_back("+ptx60");
				577	} else {
				578	// Libdevice in CUDA-7.0 requires PTX version that's more recent
				579	// than LLVM defaults to. Use PTX4.2 which is the PTX version that
				580	// came with CUDA-7.0.
				581	CC1Args.push_back("-target-feature");
				582	CC1Args.push_back("+ptx42");
				583	}
Gheorghe-Teodor Bercea	0d5aa84	2018-03-13 23:19:52 +0000	[diff] [blame^]	584
				585	if (DeviceOffloadingKind == Action::OFK_OpenMP) {
				586	SmallVector<StringRef, 8> LibraryPaths;
				587	// Add path to lib and/or lib64 folders.
				588	SmallString<256> DefaultLibPath =
				589	llvm::sys::path::parent_path(getDriver().Dir);
				590	llvm::sys::path::append(DefaultLibPath,
				591	Twine("lib") + CLANG_LIBDIR_SUFFIX);
				592	LibraryPaths.emplace_back(DefaultLibPath.c_str());
				593
				594	// Add user defined library paths from LIBRARY_PATH.
				595	llvm::Optional<std::string> LibPath =
				596	llvm::sys::Process::GetEnv("LIBRARY_PATH");
				597	if (LibPath) {
				598	SmallVector<StringRef, 8> Frags;
				599	const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
				600	llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
				601	for (StringRef Path : Frags)
				602	LibraryPaths.emplace_back(Path.trim());
				603	}
				604
				605	std::string LibOmpTargetName =
				606	"libomptarget-nvptx-" + GpuArch.str() + ".bc";
				607	bool FoundBCLibrary = false;
				608	for (StringRef LibraryPath : LibraryPaths) {
				609	SmallString<128> LibOmpTargetFile(LibraryPath);
				610	llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
				611	if (llvm::sys::fs::exists(LibOmpTargetFile)) {
				612	CC1Args.push_back("-mlink-cuda-bitcode");
				613	CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
				614	FoundBCLibrary = true;
				615	break;
				616	}
				617	}
				618	if (!FoundBCLibrary)
				619	getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
				620	<< LibOmpTargetName;
				621	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	622	}
				623
				624	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
				625	ArgStringList &CC1Args) const {
				626	// Check our CUDA version if we're going to include the CUDA headers.
				627	if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
				628	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
				629	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				630	assert(!Arch.empty() && "Must have an explicit GPU arch.");
				631	CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
				632	}
				633	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
				634	}
				635
				636	llvm::opt::DerivedArgList *
				637	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
				638	StringRef BoundArch,
				639	Action::OffloadKind DeviceOffloadKind) const {
				640	DerivedArgList *DAL =
				641	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
				642	if (!DAL)
				643	DAL = new DerivedArgList(Args.getBaseArgs());
				644
				645	const OptTable &Opts = getDriver().getOpts();
				646
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	647	// For OpenMP device offloading, append derived arguments. Make sure
				648	// flags are not duplicated.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	649	// Also append the compute capability.
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	650	if (DeviceOffloadKind == Action::OFK_OpenMP) {
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	651	for (Arg *A : Args) {
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	652	bool IsDuplicate = false;
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	653	for (Arg DALArg : DAL) {
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	654	if (A == DALArg) {
				655	IsDuplicate = true;
				656	break;
				657	}
				658	}
				659	if (!IsDuplicate)
				660	DAL->append(A);
				661	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	662
				663	StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	664	if (Arch.empty())
				665	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
				666	CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	667
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	668	return DAL;
				669	}
				670
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	671	for (Arg *A : Args) {
				672	if (A->getOption().matches(options::OPT_Xarch__)) {
				673	// Skip this argument unless the architecture matches BoundArch
				674	if (BoundArch.empty() \|\| A->getValue(0) != BoundArch)
				675	continue;
				676
				677	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
				678	unsigned Prev = Index;
				679	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
				680
				681	// If the argument parsing failed or more than one argument was
				682	// consumed, the -Xarch_ argument's parameter tried to consume
				683	// extra arguments. Emit an error and ignore.
				684	//
				685	// We also want to disallow any options which would alter the
				686	// driver behavior; that isn't going to work in our model. We
				687	// use isDriverOption() as an approximation, although things
				688	// like -O4 are going to slip through.
				689	if (!XarchArg \|\| Index > Prev + 1) {
				690	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
				691	<< A->getAsString(Args);
				692	continue;
				693	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
				694	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
				695	<< A->getAsString(Args);
				696	continue;
				697	}
				698	XarchArg->setBaseArg(A);
				699	A = XarchArg.release();
				700	DAL->AddSynthesizedArg(A);
				701	}
				702	DAL->append(A);
				703	}
				704
				705	if (!BoundArch.empty()) {
				706	DAL->eraseArg(options::OPT_march_EQ);
				707	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
				708	}
				709	return DAL;
				710	}
				711
				712	Tool *CudaToolChain::buildAssembler() const {
				713	return new tools::NVPTX::Assembler(*this);
				714	}
				715
				716	Tool *CudaToolChain::buildLinker() const {
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	717	if (OK == Action::OFK_OpenMP)
				718	return new tools::NVPTX::OpenMPLinker(*this);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	719	return new tools::NVPTX::Linker(*this);
				720	}
				721
				722	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
				723	HostTC.addClangWarningOptions(CC1Args);
				724	}
				725
				726	ToolChain::CXXStdlibType
				727	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
				728	return HostTC.GetCXXStdlibType(Args);
				729	}
				730
				731	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
				732	ArgStringList &CC1Args) const {
				733	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
				734	}
				735
				736	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
				737	ArgStringList &CC1Args) const {
				738	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
				739	}
				740
				741	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
				742	ArgStringList &CC1Args) const {
				743	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
				744	}
				745
				746	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
				747	// The CudaToolChain only supports sanitizers in the sense that it allows
				748	// sanitizer arguments on the command line if they are supported by the host
				749	// toolchain. The CudaToolChain will actually ignore any command line
				750	// arguments for any of these "supported" sanitizers. That means that no
				751	// sanitization of device code is actually supported at this time.
				752	//
				753	// This behavior is necessary because the host and device toolchains
				754	// invocations often share the command line, so the device toolchain must
				755	// tolerate flags meant only for the host toolchain.
				756	return HostTC.getSupportedSanitizers();
				757	}
				758
				759	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
				760	const ArgList &Args) const {
				761	return HostTC.computeMSVCVersion(D, Args);
				762	}