Blame - clang/lib/Driver/ToolChains/Cuda.cpp - toolchain/llvm-project

blob: 4f740fc9525f6e6501dcdb151f86af8f9c8880ea [file] [log] [blame]

David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "Cuda.h"
				11	#include "InputInfo.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	12	#include "CommonArgs.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	13	#include "clang/Basic/Cuda.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	14	#include "clang/Config/config.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	15	#include "clang/Basic/VirtualFileSystem.h"
				16	#include "clang/Driver/Compilation.h"
				17	#include "clang/Driver/Driver.h"
				18	#include "clang/Driver/DriverDiagnostic.h"
				19	#include "clang/Driver/Options.h"
				20	#include "llvm/Option/ArgList.h"
				21	#include "llvm/Support/Path.h"
				22	#include <system_error>
				23
				24	using namespace clang::driver;
				25	using namespace clang::driver::toolchains;
				26	using namespace clang::driver::tools;
				27	using namespace clang;
				28	using namespace llvm::opt;
				29
				30	// Parses the contents of version.txt in an CUDA installation. It should
				31	// contain one line of the from e.g. "CUDA Version 7.5.2".
				32	static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
				33	if (!V.startswith("CUDA Version "))
				34	return CudaVersion::UNKNOWN;
				35	V = V.substr(strlen("CUDA Version "));
				36	int Major = -1, Minor = -1;
				37	auto First = V.split('.');
				38	auto Second = First.second.split('.');
				39	if (First.first.getAsInteger(10, Major) \|\|
				40	Second.first.getAsInteger(10, Minor))
				41	return CudaVersion::UNKNOWN;
				42
				43	if (Major == 7 && Minor == 0) {
				44	// This doesn't appear to ever happen -- version.txt doesn't exist in the
				45	// CUDA 7 installs I've seen. But no harm in checking.
				46	return CudaVersion::CUDA_70;
				47	}
				48	if (Major == 7 && Minor == 5)
				49	return CudaVersion::CUDA_75;
				50	if (Major == 8 && Minor == 0)
				51	return CudaVersion::CUDA_80;
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	52	if (Major == 9 && Minor == 0)
				53	return CudaVersion::CUDA_90;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	54	return CudaVersion::UNKNOWN;
				55	}
				56
				57	CudaInstallationDetector::CudaInstallationDetector(
				58	const Driver &D, const llvm::Triple &HostTriple,
				59	const llvm::opt::ArgList &Args)
				60	: D(D) {
				61	SmallVector<std::string, 4> CudaPathCandidates;
				62
				63	// In decreasing order so we prefer newer versions to older versions.
				64	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
				65
				66	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
				67	CudaPathCandidates.push_back(
				68	Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
				69	} else if (HostTriple.isOSWindows()) {
				70	for (const char *Ver : Versions)
				71	CudaPathCandidates.push_back(
				72	D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
				73	Ver);
				74	} else {
				75	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
				76	for (const char *Ver : Versions)
				77	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
				78	}
				79
				80	for (const auto &CudaPath : CudaPathCandidates) {
				81	if (CudaPath.empty() \|\| !D.getVFS().exists(CudaPath))
				82	continue;
				83
				84	InstallPath = CudaPath;
				85	BinPath = CudaPath + "/bin";
				86	IncludePath = InstallPath + "/include";
				87	LibDevicePath = InstallPath + "/nvvm/libdevice";
				88
				89	auto &FS = D.getVFS();
Jonas Hahnfeld	e2c342f	2017-10-16 13:31:30 +0000	[diff] [blame]	90	if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	91	continue;
				92
				93	// On Linux, we have both lib and lib64 directories, and we need to choose
				94	// based on our triple. On MacOS, we have only a lib directory.
				95	//
				96	// It's sufficient for our purposes to be flexible: If both lib and lib64
				97	// exist, we choose whichever one matches our triple. Otherwise, if only
				98	// lib exists, we use it.
				99	if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
				100	LibPath = InstallPath + "/lib64";
				101	else if (FS.exists(InstallPath + "/lib"))
				102	LibPath = InstallPath + "/lib";
				103	else
				104	continue;
				105
				106	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
				107	FS.getBufferForFile(InstallPath + "/version.txt");
				108	if (!VersionFile) {
				109	// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
				110	// version.txt isn't present.
				111	Version = CudaVersion::CUDA_70;
				112	} else {
				113	Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
				114	}
				115
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	116	if (Version == CudaVersion::CUDA_90) {
				117	// CUDA-9 uses single libdevice file for all GPU variants.
				118	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
				119	if (FS.exists(FilePath)) {
				120	for (const char *GpuArch :
				121	{"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
				122	"sm_60", "sm_61", "sm_62", "sm_70"})
				123	LibDeviceMap[GpuArch] = FilePath;
				124	}
				125	} else {
				126	std::error_code EC;
				127	for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
				128	!EC && LI != LE; LI = LI.increment(EC)) {
				129	StringRef FilePath = LI->path();
				130	StringRef FileName = llvm::sys::path::filename(FilePath);
				131	// Process all bitcode filenames that look like
				132	// libdevice.compute_XX.YY.bc
				133	const StringRef LibDeviceName = "libdevice.";
				134	if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
				135	continue;
				136	StringRef GpuArch = FileName.slice(
				137	LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
				138	LibDeviceMap[GpuArch] = FilePath.str();
				139	// Insert map entries for specifc devices with this compute
				140	// capability. NVCC's choice of the libdevice library version is
				141	// rather peculiar and depends on the CUDA version.
				142	if (GpuArch == "compute_20") {
				143	LibDeviceMap["sm_20"] = FilePath;
				144	LibDeviceMap["sm_21"] = FilePath;
				145	LibDeviceMap["sm_32"] = FilePath;
				146	} else if (GpuArch == "compute_30") {
				147	LibDeviceMap["sm_30"] = FilePath;
				148	if (Version < CudaVersion::CUDA_80) {
				149	LibDeviceMap["sm_50"] = FilePath;
				150	LibDeviceMap["sm_52"] = FilePath;
				151	LibDeviceMap["sm_53"] = FilePath;
				152	}
				153	LibDeviceMap["sm_60"] = FilePath;
				154	LibDeviceMap["sm_61"] = FilePath;
				155	LibDeviceMap["sm_62"] = FilePath;
				156	} else if (GpuArch == "compute_35") {
				157	LibDeviceMap["sm_35"] = FilePath;
				158	LibDeviceMap["sm_37"] = FilePath;
				159	} else if (GpuArch == "compute_50") {
				160	if (Version >= CudaVersion::CUDA_80) {
				161	LibDeviceMap["sm_50"] = FilePath;
				162	LibDeviceMap["sm_52"] = FilePath;
				163	LibDeviceMap["sm_53"] = FilePath;
				164	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	165	}
				166	}
				167	}
				168
Jonas Hahnfeld	e2c342f	2017-10-16 13:31:30 +0000	[diff] [blame]	169	// Check that we have found at least one libdevice that we can link in if
				170	// -nocudalib hasn't been specified.
				171	if (LibDeviceMap.empty() && !Args.hasArg(options::OPT_nocudalib))
Gheorghe-Teodor Bercea	9c52574	2017-08-11 15:46:22 +0000	[diff] [blame]	172	continue;
				173
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	174	IsValid = true;
				175	break;
				176	}
				177	}
				178
				179	void CudaInstallationDetector::AddCudaIncludeArgs(
				180	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
				181	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
				182	// Add cuda_wrappers/* to our system include path. This lets us wrap
				183	// standard library headers.
				184	SmallString<128> P(D.ResourceDir);
				185	llvm::sys::path::append(P, "include");
				186	llvm::sys::path::append(P, "cuda_wrappers");
				187	CC1Args.push_back("-internal-isystem");
				188	CC1Args.push_back(DriverArgs.MakeArgString(P));
				189	}
				190
				191	if (DriverArgs.hasArg(options::OPT_nocudainc))
				192	return;
				193
				194	if (!isValid()) {
				195	D.Diag(diag::err_drv_no_cuda_installation);
				196	return;
				197	}
				198
				199	CC1Args.push_back("-internal-isystem");
				200	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
				201	CC1Args.push_back("-include");
				202	CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
				203	}
				204
				205	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
				206	CudaArch Arch) const {
				207	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
				208	ArchsWithVersionTooLowErrors.count(Arch) > 0)
				209	return;
				210
				211	auto RequiredVersion = MinVersionForCudaArch(Arch);
				212	if (Version < RequiredVersion) {
				213	ArchsWithVersionTooLowErrors.insert(Arch);
				214	D.Diag(diag::err_drv_cuda_version_too_low)
				215	<< InstallPath << CudaArchToString(Arch) << CudaVersionToString(Version)
				216	<< CudaVersionToString(RequiredVersion);
				217	}
				218	}
				219
				220	void CudaInstallationDetector::print(raw_ostream &OS) const {
				221	if (isValid())
				222	OS << "Found CUDA installation: " << InstallPath << ", version "
				223	<< CudaVersionToString(Version) << "\n";
				224	}
				225
				226	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
				227	const InputInfo &Output,
				228	const InputInfoList &Inputs,
				229	const ArgList &Args,
				230	const char *LinkingOutput) const {
				231	const auto &TC =
				232	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				233	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				234
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	235	StringRef GPUArchName;
				236	// If this is an OpenMP action we need to extract the device architecture
				237	// from the -march=arch option. This option may come from -Xopenmp-target
				238	// flag or the default value.
				239	if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
				240	GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
				241	assert(!GPUArchName.empty() && "Must have an architecture passed in.");
				242	} else
				243	GPUArchName = JA.getOffloadingArch();
				244
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	245	// Obtain architecture from the action.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	246	CudaArch gpu_arch = StringToCudaArch(GPUArchName);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	247	assert(gpu_arch != CudaArch::UNKNOWN &&
				248	"Device action expected to have an architecture.");
				249
				250	// Check that our installation's ptxas supports gpu_arch.
				251	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
				252	TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
				253	}
				254
				255	ArgStringList CmdArgs;
				256	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
				257	if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
				258	options::OPT_no_cuda_noopt_device_debug, false)) {
				259	// ptxas does not accept -g option if optimization is enabled, so
				260	// we ignore the compiler's -O* options if we want debug info.
				261	CmdArgs.push_back("-g");
				262	CmdArgs.push_back("--dont-merge-basicblocks");
				263	CmdArgs.push_back("--return-at-end");
				264	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
				265	// Map the -O we received to -O{0,1,2,3}.
				266	//
				267	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
				268	// default, so it may correspond more closely to the spirit of clang -O2.
				269
				270	// -O3 seems like the least-bad option when -Osomething is specified to
				271	// clang but it isn't handled below.
				272	StringRef OOpt = "3";
				273	if (A->getOption().matches(options::OPT_O4) \|\|
				274	A->getOption().matches(options::OPT_Ofast))
				275	OOpt = "3";
				276	else if (A->getOption().matches(options::OPT_O0))
				277	OOpt = "0";
				278	else if (A->getOption().matches(options::OPT_O)) {
				279	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
				280	OOpt = llvm::StringSwitch<const char *>(A->getValue())
				281	.Case("1", "1")
				282	.Case("2", "2")
				283	.Case("3", "3")
				284	.Case("s", "2")
				285	.Case("z", "2")
				286	.Default("2");
				287	}
				288	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
				289	} else {
				290	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
				291	// to no optimizations, but ptxas's default is -O3.
				292	CmdArgs.push_back("-O0");
				293	}
				294
Gheorghe-Teodor Bercea	53431bc	2017-08-07 20:19:23 +0000	[diff] [blame]	295	// Pass -v to ptxas if it was passed to the driver.
				296	if (Args.hasArg(options::OPT_v))
				297	CmdArgs.push_back("-v");
				298
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	299	CmdArgs.push_back("--gpu-name");
				300	CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
				301	CmdArgs.push_back("--output-file");
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	302	SmallString<256> OutputFileName(Output.getFilename());
				303	if (JA.isOffloading(Action::OFK_OpenMP))
				304	llvm::sys::path::replace_extension(OutputFileName, "cubin");
				305	CmdArgs.push_back(Args.MakeArgString(OutputFileName));
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	306	for (const auto& II : Inputs)
				307	CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
				308
				309	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
				310	CmdArgs.push_back(Args.MakeArgString(A));
				311
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	312	// In OpenMP we need to generate relocatable code.
Gheorghe-Teodor Bercea	0846582	2017-08-09 15:27:39 +0000	[diff] [blame]	313	if (JA.isOffloading(Action::OFK_OpenMP) &&
				314	Args.hasFlag(options::OPT_fopenmp_relocatable_target,
				315	options::OPT_fnoopenmp_relocatable_target,
				316	/Default=/ true))
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	317	CmdArgs.push_back("-c");
				318
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	319	const char *Exec;
				320	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
				321	Exec = A->getValue();
				322	else
				323	Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
				324	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				325	}
				326
				327	// All inputs to this linker must be from CudaDeviceActions, as we need to look
				328	// at the Inputs' Actions in order to figure out which GPU architecture they
				329	// correspond to.
				330	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
				331	const InputInfo &Output,
				332	const InputInfoList &Inputs,
				333	const ArgList &Args,
				334	const char *LinkingOutput) const {
				335	const auto &TC =
				336	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				337	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				338
				339	ArgStringList CmdArgs;
				340	CmdArgs.push_back("--cuda");
				341	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
				342	CmdArgs.push_back(Args.MakeArgString("--create"));
				343	CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
				344
				345	for (const auto& II : Inputs) {
				346	auto *A = II.getAction();
				347	assert(A->getInputs().size() == 1 &&
				348	"Device offload action is expected to have a single input");
				349	const char *gpu_arch_str = A->getOffloadingArch();
				350	assert(gpu_arch_str &&
				351	"Device action expected to have associated a GPU architecture!");
				352	CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
				353
				354	// We need to pass an Arch of the form "sm_XX" for cubin files and
				355	// "compute_XX" for ptx.
				356	const char *Arch =
				357	(II.getType() == types::TY_PP_Asm)
				358	? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
				359	: gpu_arch_str;
				360	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
				361	Arch + ",file=" + II.getFilename()));
				362	}
				363
				364	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
				365	CmdArgs.push_back(Args.MakeArgString(A));
				366
				367	const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
				368	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				369	}
				370
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	371	void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
				372	const InputInfo &Output,
				373	const InputInfoList &Inputs,
				374	const ArgList &Args,
				375	const char *LinkingOutput) const {
				376	const auto &TC =
				377	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				378	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				379
				380	ArgStringList CmdArgs;
				381
				382	// OpenMP uses nvlink to link cubin files. The result will be embedded in the
				383	// host binary by the host linker.
				384	assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
				385	"CUDA toolchain not expected for an OpenMP host device.");
				386
				387	if (Output.isFilename()) {
				388	CmdArgs.push_back("-o");
				389	CmdArgs.push_back(Output.getFilename());
				390	} else
				391	assert(Output.isNothing() && "Invalid output.");
				392	if (Args.hasArg(options::OPT_g_Flag))
				393	CmdArgs.push_back("-g");
				394
				395	if (Args.hasArg(options::OPT_v))
				396	CmdArgs.push_back("-v");
				397
				398	StringRef GPUArch =
				399	Args.getLastArgValue(options::OPT_march_EQ);
				400	assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
				401
				402	CmdArgs.push_back("-arch");
				403	CmdArgs.push_back(Args.MakeArgString(GPUArch));
				404
				405	// Add paths specified in LIBRARY_PATH environment variable as -L options.
				406	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
				407
				408	// Add paths for the default clang library path.
				409	SmallString<256> DefaultLibPath =
				410	llvm::sys::path::parent_path(TC.getDriver().Dir);
				411	llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
				412	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
				413
				414	// Add linking against library implementing OpenMP calls on NVPTX target.
				415	CmdArgs.push_back("-lomptarget-nvptx");
				416
				417	for (const auto &II : Inputs) {
				418	if (II.getType() == types::TY_LLVM_IR \|\|
				419	II.getType() == types::TY_LTO_IR \|\|
				420	II.getType() == types::TY_LTO_BC \|\|
				421	II.getType() == types::TY_LLVM_BC) {
				422	C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
				423	<< getToolChain().getTripleString();
				424	continue;
				425	}
				426
				427	// Currently, we only pass the input files to the linker, we do not pass
				428	// any libraries that may be valid only for the host.
				429	if (!II.isFilename())
				430	continue;
				431
Gheorghe-Teodor Bercea	5636f4b	2017-09-25 21:25:38 +0000	[diff] [blame]	432	SmallString<256> Name(II.getFilename());
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	433	llvm::sys::path::replace_extension(Name, "cubin");
				434
				435	const char *CubinF =
				436	C.addTempFile(C.getArgs().MakeArgString(Name));
				437
				438	CmdArgs.push_back(CubinF);
				439	}
				440
				441	AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
				442
				443	const char *Exec =
				444	Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
				445	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				446	}
				447
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	448	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
				449	/// which isn't properly a linker but nonetheless performs the step of stitching
				450	/// together object files from the assembler into a single blob.
				451
				452	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	453	const ToolChain &HostTC, const ArgList &Args,
				454	const Action::OffloadKind OK)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	455	: ToolChain(D, Triple, Args), HostTC(HostTC),
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	456	CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	457	if (CudaInstallation.isValid())
				458	getProgramPaths().push_back(CudaInstallation.getBinPath());
Gheorghe-Teodor Bercea	690f6f9	2017-08-09 19:52:28 +0000	[diff] [blame]	459	// Lookup binaries into the driver directory, this is used to
				460	// discover the clang-offload-bundler executable.
				461	getProgramPaths().push_back(getDriver().Dir);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	462	}
				463
				464	void CudaToolChain::addClangTargetOptions(
				465	const llvm::opt::ArgList &DriverArgs,
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	466	llvm::opt::ArgStringList &CC1Args,
				467	Action::OffloadKind DeviceOffloadingKind) const {
				468	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	469
				470	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				471	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	472	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
				473	DeviceOffloadingKind == Action::OFK_Cuda) &&
				474	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
				475
				476	if (DeviceOffloadingKind == Action::OFK_Cuda) {
				477	CC1Args.push_back("-fcuda-is-device");
				478
				479	if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
				480	options::OPT_fno_cuda_flush_denormals_to_zero, false))
				481	CC1Args.push_back("-fcuda-flush-denormals-to-zero");
				482
				483	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
				484	options::OPT_fno_cuda_approx_transcendentals, false))
				485	CC1Args.push_back("-fcuda-approx-transcendentals");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	486	}
				487
Gheorghe-Teodor Bercea	20789a5	2017-09-25 21:56:32 +0000	[diff] [blame]	488	if (DriverArgs.hasArg(options::OPT_nocudalib))
				489	return;
				490
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	491	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
				492
				493	if (LibDeviceFile.empty()) {
Gheorghe-Teodor Bercea	5a3608c	2017-09-26 15:36:20 +0000	[diff] [blame]	494	if (DeviceOffloadingKind == Action::OFK_OpenMP &&
				495	DriverArgs.hasArg(options::OPT_S))
				496	return;
				497
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	498	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
				499	return;
				500	}
				501
				502	CC1Args.push_back("-mlink-cuda-bitcode");
				503	CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
				504
Artem Belevich	4654dc8	2017-09-20 21:23:07 +0000	[diff] [blame]	505	if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
				506	// CUDA-9 uses new instructions that are only available in PTX6.0
				507	CC1Args.push_back("-target-feature");
				508	CC1Args.push_back("+ptx60");
				509	} else {
				510	// Libdevice in CUDA-7.0 requires PTX version that's more recent
				511	// than LLVM defaults to. Use PTX4.2 which is the PTX version that
				512	// came with CUDA-7.0.
				513	CC1Args.push_back("-target-feature");
				514	CC1Args.push_back("+ptx42");
				515	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	516	}
				517
				518	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
				519	ArgStringList &CC1Args) const {
				520	// Check our CUDA version if we're going to include the CUDA headers.
				521	if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
				522	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
				523	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				524	assert(!Arch.empty() && "Must have an explicit GPU arch.");
				525	CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
				526	}
				527	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
				528	}
				529
				530	llvm::opt::DerivedArgList *
				531	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
				532	StringRef BoundArch,
				533	Action::OffloadKind DeviceOffloadKind) const {
				534	DerivedArgList *DAL =
				535	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
				536	if (!DAL)
				537	DAL = new DerivedArgList(Args.getBaseArgs());
				538
				539	const OptTable &Opts = getDriver().getOpts();
				540
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	541	// For OpenMP device offloading, append derived arguments. Make sure
				542	// flags are not duplicated.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	543	// Also append the compute capability.
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	544	if (DeviceOffloadKind == Action::OFK_OpenMP) {
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame^]	545	for (Arg *A : Args) {
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	546	bool IsDuplicate = false;
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame^]	547	for (Arg DALArg : DAL) {
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	548	if (A == DALArg) {
				549	IsDuplicate = true;
				550	break;
				551	}
				552	}
				553	if (!IsDuplicate)
				554	DAL->append(A);
				555	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	556
				557	StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
Jonas Hahnfeld	30b4418	2017-10-17 13:37:36 +0000	[diff] [blame^]	558	if (Arch.empty())
				559	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
				560	CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	561
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	562	return DAL;
				563	}
				564
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	565	for (Arg *A : Args) {
				566	if (A->getOption().matches(options::OPT_Xarch__)) {
				567	// Skip this argument unless the architecture matches BoundArch
				568	if (BoundArch.empty() \|\| A->getValue(0) != BoundArch)
				569	continue;
				570
				571	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
				572	unsigned Prev = Index;
				573	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
				574
				575	// If the argument parsing failed or more than one argument was
				576	// consumed, the -Xarch_ argument's parameter tried to consume
				577	// extra arguments. Emit an error and ignore.
				578	//
				579	// We also want to disallow any options which would alter the
				580	// driver behavior; that isn't going to work in our model. We
				581	// use isDriverOption() as an approximation, although things
				582	// like -O4 are going to slip through.
				583	if (!XarchArg \|\| Index > Prev + 1) {
				584	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
				585	<< A->getAsString(Args);
				586	continue;
				587	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
				588	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
				589	<< A->getAsString(Args);
				590	continue;
				591	}
				592	XarchArg->setBaseArg(A);
				593	A = XarchArg.release();
				594	DAL->AddSynthesizedArg(A);
				595	}
				596	DAL->append(A);
				597	}
				598
				599	if (!BoundArch.empty()) {
				600	DAL->eraseArg(options::OPT_march_EQ);
				601	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
				602	}
				603	return DAL;
				604	}
				605
				606	Tool *CudaToolChain::buildAssembler() const {
				607	return new tools::NVPTX::Assembler(*this);
				608	}
				609
				610	Tool *CudaToolChain::buildLinker() const {
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	611	if (OK == Action::OFK_OpenMP)
				612	return new tools::NVPTX::OpenMPLinker(*this);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	613	return new tools::NVPTX::Linker(*this);
				614	}
				615
				616	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
				617	HostTC.addClangWarningOptions(CC1Args);
				618	}
				619
				620	ToolChain::CXXStdlibType
				621	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
				622	return HostTC.GetCXXStdlibType(Args);
				623	}
				624
				625	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
				626	ArgStringList &CC1Args) const {
				627	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
				628	}
				629
				630	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
				631	ArgStringList &CC1Args) const {
				632	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
				633	}
				634
				635	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
				636	ArgStringList &CC1Args) const {
				637	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
				638	}
				639
				640	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
				641	// The CudaToolChain only supports sanitizers in the sense that it allows
				642	// sanitizer arguments on the command line if they are supported by the host
				643	// toolchain. The CudaToolChain will actually ignore any command line
				644	// arguments for any of these "supported" sanitizers. That means that no
				645	// sanitization of device code is actually supported at this time.
				646	//
				647	// This behavior is necessary because the host and device toolchains
				648	// invocations often share the command line, so the device toolchain must
				649	// tolerate flags meant only for the host toolchain.
				650	return HostTC.getSupportedSanitizers();
				651	}
				652
				653	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
				654	const ArgList &Args) const {
				655	return HostTC.computeMSVCVersion(D, Args);
				656	}