Blame - clang/lib/Driver/ToolChains/Cuda.cpp - toolchain/llvm-project

blob: e513e818ebfbaff420f33f8700d4a87b3e733891 [file] [log] [blame]

David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "Cuda.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	11	#include "CommonArgs.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	12	#include "InputInfo.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	13	#include "clang/Basic/Cuda.h"
				14	#include "clang/Basic/VirtualFileSystem.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	15	#include "clang/Config/config.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	16	#include "clang/Driver/Compilation.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	17	#include "clang/Driver/Distro.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	18	#include "clang/Driver/Driver.h"
				19	#include "clang/Driver/DriverDiagnostic.h"
				20	#include "clang/Driver/Options.h"
				21	#include "llvm/Option/ArgList.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	22	#include "llvm/Support/FileSystem.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	23	#include "llvm/Support/Path.h"
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	24	#include "llvm/Support/Program.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	25	#include <system_error>
				26
				27	using namespace clang::driver;
				28	using namespace clang::driver::toolchains;
				29	using namespace clang::driver::tools;
				30	using namespace clang;
				31	using namespace llvm::opt;
				32
				33	// Parses the contents of version.txt in an CUDA installation. It should
				34	// contain one line of the from e.g. "CUDA Version 7.5.2".
				35	static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
				36	if (!V.startswith("CUDA Version "))
				37	return CudaVersion::UNKNOWN;
				38	V = V.substr(strlen("CUDA Version "));
				39	int Major = -1, Minor = -1;
				40	auto First = V.split('.');
				41	auto Second = First.second.split('.');
				42	if (First.first.getAsInteger(10, Major) \|\|
				43	Second.first.getAsInteger(10, Minor))
				44	return CudaVersion::UNKNOWN;
				45
				46	if (Major == 7 && Minor == 0) {
				47	// This doesn't appear to ever happen -- version.txt doesn't exist in the
				48	// CUDA 7 installs I've seen. But no harm in checking.
				49	return CudaVersion::CUDA_70;
				50	}
				51	if (Major == 7 && Minor == 5)
				52	return CudaVersion::CUDA_75;
				53	if (Major == 8 && Minor == 0)
				54	return CudaVersion::CUDA_80;
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	55	if (Major == 9 && Minor == 0)
				56	return CudaVersion::CUDA_90;
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	57	if (Major == 9 && Minor == 1)
				58	return CudaVersion::CUDA_91;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	59	return CudaVersion::UNKNOWN;
				60	}
				61
				62	CudaInstallationDetector::CudaInstallationDetector(
				63	const Driver &D, const llvm::Triple &HostTriple,
				64	const llvm::opt::ArgList &Args)
				65	: D(D) {
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	66	struct Candidate {
				67	std::string Path;
				68	bool StrictChecking;
				69
				70	Candidate(std::string Path, bool StrictChecking = false)
				71	: Path(Path), StrictChecking(StrictChecking) {}
				72	};
				73	SmallVector<Candidate, 4> Candidates;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	74
				75	// In decreasing order so we prefer newer versions to older versions.
				76	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
				77
				78	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	79	Candidates.emplace_back(
				80	Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	81	} else if (HostTriple.isOSWindows()) {
				82	for (const char *Ver : Versions)
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	83	Candidates.emplace_back(
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	84	D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
				85	Ver);
				86	} else {
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	87	if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
				88	// Try to find ptxas binary. If the executable is located in a directory
				89	// called 'bin/', its parent directory might be a good guess for a valid
				90	// CUDA installation.
				91	// However, some distributions might installs 'ptxas' to /usr/bin. In that
				92	// case the candidate would be '/usr' which passes the following checks
				93	// because '/usr/include' exists as well. To avoid this case, we always
				94	// check for the directory potentially containing files for libdevice,
				95	// even if the user passes -nocudalib.
				96	if (llvm::ErrorOr<std::string> ptxas =
				97	llvm::sys::findProgramByName("ptxas")) {
				98	SmallString<256> ptxasAbsolutePath;
				99	llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
				100
				101	StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
				102	if (llvm::sys::path::filename(ptxasDir) == "bin")
				103	Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
				104	/StrictChecking=/true);
				105	}
				106	}
				107
				108	Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	109	for (const char *Ver : Versions)
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	110	Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
Sylvestre Ledru	0cfcdc3	2017-11-29 15:03:28 +0000	[diff] [blame]	111
Ismail Donmez	64f99df	2017-11-29 15:18:02 +0000	[diff] [blame]	112	if (Distro(D.getVFS()).IsDebian())
Sylvestre Ledru	0cfcdc3	2017-11-29 15:03:28 +0000	[diff] [blame]	113	// Special case for Debian to have nvidia-cuda-toolkit work
				114	// out of the box. More info on http://bugs.debian.org/882505
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	115	Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	116	}
				117
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	118	bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
				119
				120	for (const auto &Candidate : Candidates) {
				121	InstallPath = Candidate.Path;
				122	if (InstallPath.empty() \|\| !D.getVFS().exists(InstallPath))
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	123	continue;
				124
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	125	BinPath = InstallPath + "/bin";
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	126	IncludePath = InstallPath + "/include";
				127	LibDevicePath = InstallPath + "/nvvm/libdevice";
				128
				129	auto &FS = D.getVFS();
Jonas Hahnfeld	e2c342f	2017-10-16 13:31:30 +0000	[diff] [blame]	130	if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	131	continue;
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	132	bool CheckLibDevice = (!NoCudaLib \|\| Candidate.StrictChecking);
				133	if (CheckLibDevice && !FS.exists(LibDevicePath))
				134	continue;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	135
				136	// On Linux, we have both lib and lib64 directories, and we need to choose
				137	// based on our triple. On MacOS, we have only a lib directory.
				138	//
				139	// It's sufficient for our purposes to be flexible: If both lib and lib64
				140	// exist, we choose whichever one matches our triple. Otherwise, if only
				141	// lib exists, we use it.
				142	if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
				143	LibPath = InstallPath + "/lib64";
				144	else if (FS.exists(InstallPath + "/lib"))
				145	LibPath = InstallPath + "/lib";
				146	else
				147	continue;
				148
				149	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
				150	FS.getBufferForFile(InstallPath + "/version.txt");
				151	if (!VersionFile) {
				152	// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
				153	// version.txt isn't present.
				154	Version = CudaVersion::CUDA_70;
				155	} else {
				156	Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
				157	}
				158
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	159	if (Version >= CudaVersion::CUDA_90) {
				160	// CUDA-9+ uses single libdevice file for all GPU variants.
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	161	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
				162	if (FS.exists(FilePath)) {
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	163	for (const char *GpuArchName :
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	164	{"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
Artem Belevich	fbc56a9	2018-01-30 00:00:12 +0000	[diff] [blame]	165	"sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) {
				166	const CudaArch GpuArch = StringToCudaArch(GpuArchName);
				167	if (Version >= MinVersionForCudaArch(GpuArch) &&
				168	Version <= MaxVersionForCudaArch(GpuArch))
				169	LibDeviceMap[GpuArchName] = FilePath;
				170	}
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	171	}
				172	} else {
				173	std::error_code EC;
				174	for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
				175	!EC && LI != LE; LI = LI.increment(EC)) {
				176	StringRef FilePath = LI->path();
				177	StringRef FileName = llvm::sys::path::filename(FilePath);
				178	// Process all bitcode filenames that look like
				179	// libdevice.compute_XX.YY.bc
				180	const StringRef LibDeviceName = "libdevice.";
				181	if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
				182	continue;
				183	StringRef GpuArch = FileName.slice(
				184	LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
				185	LibDeviceMap[GpuArch] = FilePath.str();
				186	// Insert map entries for specifc devices with this compute
				187	// capability. NVCC's choice of the libdevice library version is
				188	// rather peculiar and depends on the CUDA version.
				189	if (GpuArch == "compute_20") {
				190	LibDeviceMap["sm_20"] = FilePath;
				191	LibDeviceMap["sm_21"] = FilePath;
				192	LibDeviceMap["sm_32"] = FilePath;
				193	} else if (GpuArch == "compute_30") {
				194	LibDeviceMap["sm_30"] = FilePath;
				195	if (Version < CudaVersion::CUDA_80) {
				196	LibDeviceMap["sm_50"] = FilePath;
				197	LibDeviceMap["sm_52"] = FilePath;
				198	LibDeviceMap["sm_53"] = FilePath;
				199	}
				200	LibDeviceMap["sm_60"] = FilePath;
				201	LibDeviceMap["sm_61"] = FilePath;
				202	LibDeviceMap["sm_62"] = FilePath;
				203	} else if (GpuArch == "compute_35") {
				204	LibDeviceMap["sm_35"] = FilePath;
				205	LibDeviceMap["sm_37"] = FilePath;
				206	} else if (GpuArch == "compute_50") {
				207	if (Version >= CudaVersion::CUDA_80) {
				208	LibDeviceMap["sm_50"] = FilePath;
				209	LibDeviceMap["sm_52"] = FilePath;
				210	LibDeviceMap["sm_53"] = FilePath;
				211	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	212	}
				213	}
				214	}
				215
Jonas Hahnfeld	e2c342f	2017-10-16 13:31:30 +0000	[diff] [blame]	216	// Check that we have found at least one libdevice that we can link in if
				217	// -nocudalib hasn't been specified.
Jonas Hahnfeld	7f9c518	2018-01-31 08:26:51 +0000	[diff] [blame]	218	if (LibDeviceMap.empty() && !NoCudaLib)
Gheorghe-Teodor Bercea	9c52574	2017-08-11 15:46:22 +0000	[diff] [blame]	219	continue;
				220
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	221	IsValid = true;
				222	break;
				223	}
				224	}
				225
				226	void CudaInstallationDetector::AddCudaIncludeArgs(
				227	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
				228	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
				229	// Add cuda_wrappers/* to our system include path. This lets us wrap
				230	// standard library headers.
				231	SmallString<128> P(D.ResourceDir);
				232	llvm::sys::path::append(P, "include");
				233	llvm::sys::path::append(P, "cuda_wrappers");
				234	CC1Args.push_back("-internal-isystem");
				235	CC1Args.push_back(DriverArgs.MakeArgString(P));
				236	}
				237
				238	if (DriverArgs.hasArg(options::OPT_nocudainc))
				239	return;
				240
				241	if (!isValid()) {
				242	D.Diag(diag::err_drv_no_cuda_installation);
				243	return;
				244	}
				245
				246	CC1Args.push_back("-internal-isystem");
				247	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
				248	CC1Args.push_back("-include");
				249	CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
				250	}
				251
				252	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
				253	CudaArch Arch) const {
				254	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
Justin Lebar	066494d	2017-10-25 21:32:06 +0000	[diff] [blame]	255	ArchsWithBadVersion.count(Arch) > 0)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	256	return;
				257
Justin Lebar	066494d	2017-10-25 21:32:06 +0000	[diff] [blame]	258	auto MinVersion = MinVersionForCudaArch(Arch);
				259	auto MaxVersion = MaxVersionForCudaArch(Arch);
				260	if (Version < MinVersion \|\| Version > MaxVersion) {
				261	ArchsWithBadVersion.insert(Arch);
				262	D.Diag(diag::err_drv_cuda_version_unsupported)
				263	<< CudaArchToString(Arch) << CudaVersionToString(MinVersion)
				264	<< CudaVersionToString(MaxVersion) << InstallPath
				265	<< CudaVersionToString(Version);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	266	}
				267	}
				268
				269	void CudaInstallationDetector::print(raw_ostream &OS) const {
				270	if (isValid())
				271	OS << "Found CUDA installation: " << InstallPath << ", version "
				272	<< CudaVersionToString(Version) << "\n";
				273	}
				274
				275	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
				276	const InputInfo &Output,
				277	const InputInfoList &Inputs,
				278	const ArgList &Args,
				279	const char *LinkingOutput) const {
				280	const auto &TC =
				281	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				282	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				283
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	284	StringRef GPUArchName;
				285	// If this is an OpenMP action we need to extract the device architecture
				286	// from the -march=arch option. This option may come from -Xopenmp-target
				287	// flag or the default value.
				288	if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
				289	GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
				290	assert(!GPUArchName.empty() && "Must have an architecture passed in.");
				291	} else
				292	GPUArchName = JA.getOffloadingArch();
				293
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	294	// Obtain architecture from the action.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	295	CudaArch gpu_arch = StringToCudaArch(GPUArchName);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	296	assert(gpu_arch != CudaArch::UNKNOWN &&
				297	"Device action expected to have an architecture.");
				298
				299	// Check that our installation's ptxas supports gpu_arch.
				300	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
				301	TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
				302	}
				303
				304	ArgStringList CmdArgs;
				305	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
				306	if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
				307	options::OPT_no_cuda_noopt_device_debug, false)) {
				308	// ptxas does not accept -g option if optimization is enabled, so
				309	// we ignore the compiler's -O* options if we want debug info.
				310	CmdArgs.push_back("-g");
				311	CmdArgs.push_back("--dont-merge-basicblocks");
				312	CmdArgs.push_back("--return-at-end");
				313	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
				314	// Map the -O we received to -O{0,1,2,3}.
				315	//
				316	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
				317	// default, so it may correspond more closely to the spirit of clang -O2.
				318
				319	// -O3 seems like the least-bad option when -Osomething is specified to
				320	// clang but it isn't handled below.
				321	StringRef OOpt = "3";
				322	if (A->getOption().matches(options::OPT_O4) \|\|
				323	A->getOption().matches(options::OPT_Ofast))
				324	OOpt = "3";
				325	else if (A->getOption().matches(options::OPT_O0))
				326	OOpt = "0";
				327	else if (A->getOption().matches(options::OPT_O)) {
				328	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
				329	OOpt = llvm::StringSwitch<const char *>(A->getValue())
				330	.Case("1", "1")
				331	.Case("2", "2")
				332	.Case("3", "3")
				333	.Case("s", "2")
				334	.Case("z", "2")
				335	.Default("2");
				336	}
				337	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
				338	} else {
				339	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
				340	// to no optimizations, but ptxas's default is -O3.
				341	CmdArgs.push_back("-O0");
				342	}
				343
Gheorghe-Teodor Bercea	53431bc	2017-08-07 20:19:23 +0000	[diff] [blame]	344	// Pass -v to ptxas if it was passed to the driver.
				345	if (Args.hasArg(options::OPT_v))
				346	CmdArgs.push_back("-v");
				347
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	348	CmdArgs.push_back("--gpu-name");
				349	CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
				350	CmdArgs.push_back("--output-file");
Jonas Hahnfeld	7c78cc5	2017-11-21 14:44:45 +0000	[diff] [blame]	351	CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	352	for (const auto& II : Inputs)
				353	CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
				354
				355	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
				356	CmdArgs.push_back(Args.MakeArgString(A));
				357
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	358	// In OpenMP we need to generate relocatable code.
Gheorghe-Teodor Bercea	0846582	2017-08-09 15:27:39 +0000	[diff] [blame]	359	if (JA.isOffloading(Action::OFK_OpenMP) &&
				360	Args.hasFlag(options::OPT_fopenmp_relocatable_target,
				361	options::OPT_fnoopenmp_relocatable_target,
				362	/Default=/ true))
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	363	CmdArgs.push_back("-c");
				364
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	365	const char *Exec;
				366	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
				367	Exec = A->getValue();
				368	else
				369	Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
				370	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				371	}
				372
				373	// All inputs to this linker must be from CudaDeviceActions, as we need to look
				374	// at the Inputs' Actions in order to figure out which GPU architecture they
				375	// correspond to.
				376	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
				377	const InputInfo &Output,
				378	const InputInfoList &Inputs,
				379	const ArgList &Args,
				380	const char *LinkingOutput) const {
				381	const auto &TC =
				382	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				383	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				384
				385	ArgStringList CmdArgs;
				386	CmdArgs.push_back("--cuda");
				387	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
				388	CmdArgs.push_back(Args.MakeArgString("--create"));
				389	CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
				390
				391	for (const auto& II : Inputs) {
				392	auto *A = II.getAction();
				393	assert(A->getInputs().size() == 1 &&
				394	"Device offload action is expected to have a single input");
				395	const char *gpu_arch_str = A->getOffloadingArch();
				396	assert(gpu_arch_str &&
				397	"Device action expected to have associated a GPU architecture!");
				398	CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
				399
				400	// We need to pass an Arch of the form "sm_XX" for cubin files and
				401	// "compute_XX" for ptx.
				402	const char *Arch =
				403	(II.getType() == types::TY_PP_Asm)
				404	? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
				405	: gpu_arch_str;
				406	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
				407	Arch + ",file=" + II.getFilename()));
				408	}
				409
				410	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
				411	CmdArgs.push_back(Args.MakeArgString(A));
				412
				413	const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
				414	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				415	}
				416
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	417	void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
				418	const InputInfo &Output,
				419	const InputInfoList &Inputs,
				420	const ArgList &Args,
				421	const char *LinkingOutput) const {
				422	const auto &TC =
				423	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				424	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				425
				426	ArgStringList CmdArgs;
				427
				428	// OpenMP uses nvlink to link cubin files. The result will be embedded in the
				429	// host binary by the host linker.
				430	assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
				431	"CUDA toolchain not expected for an OpenMP host device.");
				432
				433	if (Output.isFilename()) {
				434	CmdArgs.push_back("-o");
				435	CmdArgs.push_back(Output.getFilename());
				436	} else
				437	assert(Output.isNothing() && "Invalid output.");
				438	if (Args.hasArg(options::OPT_g_Flag))
				439	CmdArgs.push_back("-g");
				440
				441	if (Args.hasArg(options::OPT_v))
				442	CmdArgs.push_back("-v");
				443
				444	StringRef GPUArch =
				445	Args.getLastArgValue(options::OPT_march_EQ);
				446	assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
				447
				448	CmdArgs.push_back("-arch");
				449	CmdArgs.push_back(Args.MakeArgString(GPUArch));
				450
				451	// Add paths specified in LIBRARY_PATH environment variable as -L options.
				452	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
				453
				454	// Add paths for the default clang library path.
				455	SmallString<256> DefaultLibPath =
				456	llvm::sys::path::parent_path(TC.getDriver().Dir);
				457	llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
				458	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
				459
				460	// Add linking against library implementing OpenMP calls on NVPTX target.
				461	CmdArgs.push_back("-lomptarget-nvptx");
				462
				463	for (const auto &II : Inputs) {
				464	if (II.getType() == types::TY_LLVM_IR \|\|
				465	II.getType() == types::TY_LTO_IR \|\|
				466	II.getType() == types::TY_LTO_BC \|\|
				467	II.getType() == types::TY_LLVM_BC) {
				468	C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
				469	<< getToolChain().getTripleString();
				470	continue;
				471	}
				472
				473	// Currently, we only pass the input files to the linker, we do not pass
				474	// any libraries that may be valid only for the host.
				475	if (!II.isFilename())
				476	continue;
				477
Jonas Hahnfeld	7c78cc5	2017-11-21 14:44:45 +0000	[diff] [blame]	478	const char *CubinF = C.addTempFile(
				479	C.getArgs().MakeArgString(getToolChain().getInputFilename(II)));
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	480
				481	CmdArgs.push_back(CubinF);
				482	}
				483
				484	AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
				485
				486	const char *Exec =
				487	Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
				488	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				489	}
				490
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	491	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
				492	/// which isn't properly a linker but nonetheless performs the step of stitching
				493	/// together object files from the assembler into a single blob.
				494
				495	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	496	const ToolChain &HostTC, const ArgList &Args,
				497	const Action::OffloadKind OK)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	498	: ToolChain(D, Triple, Args), HostTC(HostTC),
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	499	CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	500	if (CudaInstallation.isValid())
				501	getProgramPaths().push_back(CudaInstallation.getBinPath());
Gheorghe-Teodor Bercea	690f6f9	2017-08-09 19:52:28 +0000	[diff] [blame]	502	// Lookup binaries into the driver directory, this is used to
				503	// discover the clang-offload-bundler executable.
				504	getProgramPaths().push_back(getDriver().Dir);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	505	}
				506
Jonas Hahnfeld	7c78cc5	2017-11-21 14:44:45 +0000	[diff] [blame]	507	std::string CudaToolChain::getInputFilename(const InputInfo &Input) const {
				508	// Only object files are changed, for example assembly files keep their .s
				509	// extensions. CUDA also continues to use .o as they don't use nvlink but
				510	// fatbinary.
				511	if (!(OK == Action::OFK_OpenMP && Input.getType() == types::TY_Object))
				512	return ToolChain::getInputFilename(Input);
				513
				514	// Replace extension for object files with cubin because nvlink relies on
				515	// these particular file names.
				516	SmallString<256> Filename(ToolChain::getInputFilename(Input));
				517	llvm::sys::path::replace_extension(Filename, "cubin");
				518	return Filename.str();
				519	}
				520
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	521	void CudaToolChain::addClangTargetOptions(
				522	const llvm::opt::ArgList &DriverArgs,
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	523	llvm::opt::ArgStringList &CC1Args,
				524	Action::OffloadKind DeviceOffloadingKind) const {
				525	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	526
				527	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				528	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	529	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
				530	DeviceOffloadingKind == Action::OFK_Cuda) &&
				531	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
				532
				533	if (DeviceOffloadingKind == Action::OFK_Cuda) {
				534	CC1Args.push_back("-fcuda-is-device");
				535
				536	if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
				537	options::OPT_fno_cuda_flush_denormals_to_zero, false))
				538	CC1Args.push_back("-fcuda-flush-denormals-to-zero");
				539
				540	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
				541	options::OPT_fno_cuda_approx_transcendentals, false))
				542	CC1Args.push_back("-fcuda-approx-transcendentals");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	543	}
				544
Gheorghe-Teodor Bercea	20789a5	2017-09-25 21:56:32 +0000	[diff] [blame]	545	if (DriverArgs.hasArg(options::OPT_nocudalib))
				546	return;
				547
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	548	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
				549
				550	if (LibDeviceFile.empty()) {
Gheorghe-Teodor Bercea	5a3608c	2017-09-26 15:36:20 +0000	[diff] [blame]	551	if (DeviceOffloadingKind == Action::OFK_OpenMP &&
				552	DriverArgs.hasArg(options::OPT_S))
				553	return;
				554
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	555	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
				556	return;
				557	}
				558
				559	CC1Args.push_back("-mlink-cuda-bitcode");
				560	CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
				561
Artem Belevich	4654dc8	2017-09-20 21:23:07 +0000	[diff] [blame]	562	if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
				563	// CUDA-9 uses new instructions that are only available in PTX6.0
				564	CC1Args.push_back("-target-feature");
				565	CC1Args.push_back("+ptx60");
				566	} else {
				567	// Libdevice in CUDA-7.0 requires PTX version that's more recent
				568	// than LLVM defaults to. Use PTX4.2 which is the PTX version that
				569	// came with CUDA-7.0.
				570	CC1Args.push_back("-target-feature");
				571	CC1Args.push_back("+ptx42");
				572	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	573	}
				574
				575	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
				576	ArgStringList &CC1Args) const {
				577	// Check our CUDA version if we're going to include the CUDA headers.
				578	if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
				579	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
				580	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				581	assert(!Arch.empty() && "Must have an explicit GPU arch.");
				582	CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
				583	}
				584	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
				585	}
				586
				587	llvm::opt::DerivedArgList *
				588	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
				589	StringRef BoundArch,
				590	Action::OffloadKind DeviceOffloadKind) const {
				591	DerivedArgList *DAL =
				592	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
				593	if (!DAL)
				594	DAL = new DerivedArgList(Args.getBaseArgs());
				595
				596	const OptTable &Opts = getDriver().getOpts();
				597
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	598	// For OpenMP device offloading, append derived arguments. Make sure
				599	// flags are not duplicated.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	600	// Also append the compute capability.
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	601	if (DeviceOffloadKind == Action::OFK_OpenMP) {
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	602	for (Arg *A : Args) {
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	603	bool IsDuplicate = false;
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	604	for (Arg DALArg : DAL) {
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	605	if (A == DALArg) {
				606	IsDuplicate = true;
				607	break;
				608	}
				609	}
				610	if (!IsDuplicate)
				611	DAL->append(A);
				612	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	613
				614	StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame]	615	if (Arch.empty())
				616	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
				617	CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	618
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	619	return DAL;
				620	}
				621
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	622	for (Arg *A : Args) {
				623	if (A->getOption().matches(options::OPT_Xarch__)) {
				624	// Skip this argument unless the architecture matches BoundArch
				625	if (BoundArch.empty() \|\| A->getValue(0) != BoundArch)
				626	continue;
				627
				628	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
				629	unsigned Prev = Index;
				630	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
				631
				632	// If the argument parsing failed or more than one argument was
				633	// consumed, the -Xarch_ argument's parameter tried to consume
				634	// extra arguments. Emit an error and ignore.
				635	//
				636	// We also want to disallow any options which would alter the
				637	// driver behavior; that isn't going to work in our model. We
				638	// use isDriverOption() as an approximation, although things
				639	// like -O4 are going to slip through.
				640	if (!XarchArg \|\| Index > Prev + 1) {
				641	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
				642	<< A->getAsString(Args);
				643	continue;
				644	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
				645	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
				646	<< A->getAsString(Args);
				647	continue;
				648	}
				649	XarchArg->setBaseArg(A);
				650	A = XarchArg.release();
				651	DAL->AddSynthesizedArg(A);
				652	}
				653	DAL->append(A);
				654	}
				655
				656	if (!BoundArch.empty()) {
				657	DAL->eraseArg(options::OPT_march_EQ);
				658	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
				659	}
				660	return DAL;
				661	}
				662
				663	Tool *CudaToolChain::buildAssembler() const {
				664	return new tools::NVPTX::Assembler(*this);
				665	}
				666
				667	Tool *CudaToolChain::buildLinker() const {
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	668	if (OK == Action::OFK_OpenMP)
				669	return new tools::NVPTX::OpenMPLinker(*this);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	670	return new tools::NVPTX::Linker(*this);
				671	}
				672
				673	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
				674	HostTC.addClangWarningOptions(CC1Args);
				675	}
				676
				677	ToolChain::CXXStdlibType
				678	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
				679	return HostTC.GetCXXStdlibType(Args);
				680	}
				681
				682	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
				683	ArgStringList &CC1Args) const {
				684	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
				685	}
				686
				687	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
				688	ArgStringList &CC1Args) const {
				689	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
				690	}
				691
				692	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
				693	ArgStringList &CC1Args) const {
				694	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
				695	}
				696
				697	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
				698	// The CudaToolChain only supports sanitizers in the sense that it allows
				699	// sanitizer arguments on the command line if they are supported by the host
				700	// toolchain. The CudaToolChain will actually ignore any command line
				701	// arguments for any of these "supported" sanitizers. That means that no
				702	// sanitization of device code is actually supported at this time.
				703	//
				704	// This behavior is necessary because the host and device toolchains
				705	// invocations often share the command line, so the device toolchain must
				706	// tolerate flags meant only for the host toolchain.
				707	return HostTC.getSupportedSanitizers();
				708	}
				709
				710	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
				711	const ArgList &Args) const {
				712	return HostTC.computeMSVCVersion(D, Args);
				713	}