Blame - clang/lib/Driver/ToolChains/Cuda.cpp - toolchain/llvm-project

blob: a0b348ffc4a7c482169d51f33f52e55e1cd5f120 [file] [log] [blame]

David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "Cuda.h"
				11	#include "InputInfo.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	12	#include "CommonArgs.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	13	#include "clang/Basic/Cuda.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	14	#include "clang/Config/config.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	15	#include "clang/Basic/VirtualFileSystem.h"
				16	#include "clang/Driver/Compilation.h"
				17	#include "clang/Driver/Driver.h"
				18	#include "clang/Driver/DriverDiagnostic.h"
				19	#include "clang/Driver/Options.h"
				20	#include "llvm/Option/ArgList.h"
				21	#include "llvm/Support/Path.h"
				22	#include <system_error>
				23
				24	using namespace clang::driver;
				25	using namespace clang::driver::toolchains;
				26	using namespace clang::driver::tools;
				27	using namespace clang;
				28	using namespace llvm::opt;
				29
				30	// Parses the contents of version.txt in an CUDA installation. It should
				31	// contain one line of the from e.g. "CUDA Version 7.5.2".
				32	static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
				33	if (!V.startswith("CUDA Version "))
				34	return CudaVersion::UNKNOWN;
				35	V = V.substr(strlen("CUDA Version "));
				36	int Major = -1, Minor = -1;
				37	auto First = V.split('.');
				38	auto Second = First.second.split('.');
				39	if (First.first.getAsInteger(10, Major) \|\|
				40	Second.first.getAsInteger(10, Minor))
				41	return CudaVersion::UNKNOWN;
				42
				43	if (Major == 7 && Minor == 0) {
				44	// This doesn't appear to ever happen -- version.txt doesn't exist in the
				45	// CUDA 7 installs I've seen. But no harm in checking.
				46	return CudaVersion::CUDA_70;
				47	}
				48	if (Major == 7 && Minor == 5)
				49	return CudaVersion::CUDA_75;
				50	if (Major == 8 && Minor == 0)
				51	return CudaVersion::CUDA_80;
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	52	if (Major == 9 && Minor == 0)
				53	return CudaVersion::CUDA_90;
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	54	return CudaVersion::UNKNOWN;
				55	}
				56
				57	CudaInstallationDetector::CudaInstallationDetector(
				58	const Driver &D, const llvm::Triple &HostTriple,
				59	const llvm::opt::ArgList &Args)
				60	: D(D) {
				61	SmallVector<std::string, 4> CudaPathCandidates;
				62
				63	// In decreasing order so we prefer newer versions to older versions.
				64	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
				65
				66	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
				67	CudaPathCandidates.push_back(
				68	Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
				69	} else if (HostTriple.isOSWindows()) {
				70	for (const char *Ver : Versions)
				71	CudaPathCandidates.push_back(
				72	D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
				73	Ver);
				74	} else {
				75	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
				76	for (const char *Ver : Versions)
				77	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
				78	}
				79
				80	for (const auto &CudaPath : CudaPathCandidates) {
				81	if (CudaPath.empty() \|\| !D.getVFS().exists(CudaPath))
				82	continue;
				83
				84	InstallPath = CudaPath;
				85	BinPath = CudaPath + "/bin";
				86	IncludePath = InstallPath + "/include";
				87	LibDevicePath = InstallPath + "/nvvm/libdevice";
				88
				89	auto &FS = D.getVFS();
				90	if (!(FS.exists(IncludePath) && FS.exists(BinPath) &&
				91	FS.exists(LibDevicePath)))
				92	continue;
				93
				94	// On Linux, we have both lib and lib64 directories, and we need to choose
				95	// based on our triple. On MacOS, we have only a lib directory.
				96	//
				97	// It's sufficient for our purposes to be flexible: If both lib and lib64
				98	// exist, we choose whichever one matches our triple. Otherwise, if only
				99	// lib exists, we use it.
				100	if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
				101	LibPath = InstallPath + "/lib64";
				102	else if (FS.exists(InstallPath + "/lib"))
				103	LibPath = InstallPath + "/lib";
				104	else
				105	continue;
				106
				107	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
				108	FS.getBufferForFile(InstallPath + "/version.txt");
				109	if (!VersionFile) {
				110	// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
				111	// version.txt isn't present.
				112	Version = CudaVersion::CUDA_70;
				113	} else {
				114	Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
				115	}
				116
Artem Belevich	8af4e23	2017-09-07 18:14:32 +0000	[diff] [blame]	117	if (Version == CudaVersion::CUDA_90) {
				118	// CUDA-9 uses single libdevice file for all GPU variants.
				119	std::string FilePath = LibDevicePath + "/libdevice.10.bc";
				120	if (FS.exists(FilePath)) {
				121	for (const char *GpuArch :
				122	{"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
				123	"sm_60", "sm_61", "sm_62", "sm_70"})
				124	LibDeviceMap[GpuArch] = FilePath;
				125	}
				126	} else {
				127	std::error_code EC;
				128	for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
				129	!EC && LI != LE; LI = LI.increment(EC)) {
				130	StringRef FilePath = LI->path();
				131	StringRef FileName = llvm::sys::path::filename(FilePath);
				132	// Process all bitcode filenames that look like
				133	// libdevice.compute_XX.YY.bc
				134	const StringRef LibDeviceName = "libdevice.";
				135	if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
				136	continue;
				137	StringRef GpuArch = FileName.slice(
				138	LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
				139	LibDeviceMap[GpuArch] = FilePath.str();
				140	// Insert map entries for specifc devices with this compute
				141	// capability. NVCC's choice of the libdevice library version is
				142	// rather peculiar and depends on the CUDA version.
				143	if (GpuArch == "compute_20") {
				144	LibDeviceMap["sm_20"] = FilePath;
				145	LibDeviceMap["sm_21"] = FilePath;
				146	LibDeviceMap["sm_32"] = FilePath;
				147	} else if (GpuArch == "compute_30") {
				148	LibDeviceMap["sm_30"] = FilePath;
				149	if (Version < CudaVersion::CUDA_80) {
				150	LibDeviceMap["sm_50"] = FilePath;
				151	LibDeviceMap["sm_52"] = FilePath;
				152	LibDeviceMap["sm_53"] = FilePath;
				153	}
				154	LibDeviceMap["sm_60"] = FilePath;
				155	LibDeviceMap["sm_61"] = FilePath;
				156	LibDeviceMap["sm_62"] = FilePath;
				157	} else if (GpuArch == "compute_35") {
				158	LibDeviceMap["sm_35"] = FilePath;
				159	LibDeviceMap["sm_37"] = FilePath;
				160	} else if (GpuArch == "compute_50") {
				161	if (Version >= CudaVersion::CUDA_80) {
				162	LibDeviceMap["sm_50"] = FilePath;
				163	LibDeviceMap["sm_52"] = FilePath;
				164	LibDeviceMap["sm_53"] = FilePath;
				165	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	166	}
				167	}
				168	}
				169
Gheorghe-Teodor Bercea	9c52574	2017-08-11 15:46:22 +0000	[diff] [blame]	170	// This code prevents IsValid from being set when
				171	// no libdevice has been found.
				172	bool allEmpty = true;
				173	std::string LibDeviceFile;
				174	for (auto key : LibDeviceMap.keys()) {
				175	LibDeviceFile = LibDeviceMap.lookup(key);
				176	if (!LibDeviceFile.empty())
				177	allEmpty = false;
				178	}
				179
				180	if (allEmpty)
				181	continue;
				182
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	183	IsValid = true;
				184	break;
				185	}
				186	}
				187
				188	void CudaInstallationDetector::AddCudaIncludeArgs(
				189	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
				190	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
				191	// Add cuda_wrappers/* to our system include path. This lets us wrap
				192	// standard library headers.
				193	SmallString<128> P(D.ResourceDir);
				194	llvm::sys::path::append(P, "include");
				195	llvm::sys::path::append(P, "cuda_wrappers");
				196	CC1Args.push_back("-internal-isystem");
				197	CC1Args.push_back(DriverArgs.MakeArgString(P));
				198	}
				199
				200	if (DriverArgs.hasArg(options::OPT_nocudainc))
				201	return;
				202
				203	if (!isValid()) {
				204	D.Diag(diag::err_drv_no_cuda_installation);
				205	return;
				206	}
				207
				208	CC1Args.push_back("-internal-isystem");
				209	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
				210	CC1Args.push_back("-include");
				211	CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
				212	}
				213
				214	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
				215	CudaArch Arch) const {
				216	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
				217	ArchsWithVersionTooLowErrors.count(Arch) > 0)
				218	return;
				219
				220	auto RequiredVersion = MinVersionForCudaArch(Arch);
				221	if (Version < RequiredVersion) {
				222	ArchsWithVersionTooLowErrors.insert(Arch);
				223	D.Diag(diag::err_drv_cuda_version_too_low)
				224	<< InstallPath << CudaArchToString(Arch) << CudaVersionToString(Version)
				225	<< CudaVersionToString(RequiredVersion);
				226	}
				227	}
				228
				229	void CudaInstallationDetector::print(raw_ostream &OS) const {
				230	if (isValid())
				231	OS << "Found CUDA installation: " << InstallPath << ", version "
				232	<< CudaVersionToString(Version) << "\n";
				233	}
				234
				235	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
				236	const InputInfo &Output,
				237	const InputInfoList &Inputs,
				238	const ArgList &Args,
				239	const char *LinkingOutput) const {
				240	const auto &TC =
				241	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				242	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				243
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	244	StringRef GPUArchName;
				245	// If this is an OpenMP action we need to extract the device architecture
				246	// from the -march=arch option. This option may come from -Xopenmp-target
				247	// flag or the default value.
				248	if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
				249	GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
				250	assert(!GPUArchName.empty() && "Must have an architecture passed in.");
				251	} else
				252	GPUArchName = JA.getOffloadingArch();
				253
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	254	// Obtain architecture from the action.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	255	CudaArch gpu_arch = StringToCudaArch(GPUArchName);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	256	assert(gpu_arch != CudaArch::UNKNOWN &&
				257	"Device action expected to have an architecture.");
				258
				259	// Check that our installation's ptxas supports gpu_arch.
				260	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
				261	TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
				262	}
				263
				264	ArgStringList CmdArgs;
				265	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
				266	if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
				267	options::OPT_no_cuda_noopt_device_debug, false)) {
				268	// ptxas does not accept -g option if optimization is enabled, so
				269	// we ignore the compiler's -O* options if we want debug info.
				270	CmdArgs.push_back("-g");
				271	CmdArgs.push_back("--dont-merge-basicblocks");
				272	CmdArgs.push_back("--return-at-end");
				273	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
				274	// Map the -O we received to -O{0,1,2,3}.
				275	//
				276	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
				277	// default, so it may correspond more closely to the spirit of clang -O2.
				278
				279	// -O3 seems like the least-bad option when -Osomething is specified to
				280	// clang but it isn't handled below.
				281	StringRef OOpt = "3";
				282	if (A->getOption().matches(options::OPT_O4) \|\|
				283	A->getOption().matches(options::OPT_Ofast))
				284	OOpt = "3";
				285	else if (A->getOption().matches(options::OPT_O0))
				286	OOpt = "0";
				287	else if (A->getOption().matches(options::OPT_O)) {
				288	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
				289	OOpt = llvm::StringSwitch<const char *>(A->getValue())
				290	.Case("1", "1")
				291	.Case("2", "2")
				292	.Case("3", "3")
				293	.Case("s", "2")
				294	.Case("z", "2")
				295	.Default("2");
				296	}
				297	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
				298	} else {
				299	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
				300	// to no optimizations, but ptxas's default is -O3.
				301	CmdArgs.push_back("-O0");
				302	}
				303
Gheorghe-Teodor Bercea	53431bc	2017-08-07 20:19:23 +0000	[diff] [blame]	304	// Pass -v to ptxas if it was passed to the driver.
				305	if (Args.hasArg(options::OPT_v))
				306	CmdArgs.push_back("-v");
				307
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	308	CmdArgs.push_back("--gpu-name");
				309	CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
				310	CmdArgs.push_back("--output-file");
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	311	SmallString<256> OutputFileName(Output.getFilename());
				312	if (JA.isOffloading(Action::OFK_OpenMP))
				313	llvm::sys::path::replace_extension(OutputFileName, "cubin");
				314	CmdArgs.push_back(Args.MakeArgString(OutputFileName));
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	315	for (const auto& II : Inputs)
				316	CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
				317
				318	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
				319	CmdArgs.push_back(Args.MakeArgString(A));
				320
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	321	// In OpenMP we need to generate relocatable code.
Gheorghe-Teodor Bercea	0846582	2017-08-09 15:27:39 +0000	[diff] [blame]	322	if (JA.isOffloading(Action::OFK_OpenMP) &&
				323	Args.hasFlag(options::OPT_fopenmp_relocatable_target,
				324	options::OPT_fnoopenmp_relocatable_target,
				325	/Default=/ true))
Gheorghe-Teodor Bercea	b9d1172	2017-08-09 14:59:35 +0000	[diff] [blame]	326	CmdArgs.push_back("-c");
				327
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	328	const char *Exec;
				329	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
				330	Exec = A->getValue();
				331	else
				332	Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
				333	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				334	}
				335
				336	// All inputs to this linker must be from CudaDeviceActions, as we need to look
				337	// at the Inputs' Actions in order to figure out which GPU architecture they
				338	// correspond to.
				339	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
				340	const InputInfo &Output,
				341	const InputInfoList &Inputs,
				342	const ArgList &Args,
				343	const char *LinkingOutput) const {
				344	const auto &TC =
				345	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				346	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				347
				348	ArgStringList CmdArgs;
				349	CmdArgs.push_back("--cuda");
				350	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
				351	CmdArgs.push_back(Args.MakeArgString("--create"));
				352	CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
				353
				354	for (const auto& II : Inputs) {
				355	auto *A = II.getAction();
				356	assert(A->getInputs().size() == 1 &&
				357	"Device offload action is expected to have a single input");
				358	const char *gpu_arch_str = A->getOffloadingArch();
				359	assert(gpu_arch_str &&
				360	"Device action expected to have associated a GPU architecture!");
				361	CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
				362
				363	// We need to pass an Arch of the form "sm_XX" for cubin files and
				364	// "compute_XX" for ptx.
				365	const char *Arch =
				366	(II.getType() == types::TY_PP_Asm)
				367	? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
				368	: gpu_arch_str;
				369	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
				370	Arch + ",file=" + II.getFilename()));
				371	}
				372
				373	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
				374	CmdArgs.push_back(Args.MakeArgString(A));
				375
				376	const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
				377	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				378	}
				379
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	380	void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
				381	const InputInfo &Output,
				382	const InputInfoList &Inputs,
				383	const ArgList &Args,
				384	const char *LinkingOutput) const {
				385	const auto &TC =
				386	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				387	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				388
				389	ArgStringList CmdArgs;
				390
				391	// OpenMP uses nvlink to link cubin files. The result will be embedded in the
				392	// host binary by the host linker.
				393	assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
				394	"CUDA toolchain not expected for an OpenMP host device.");
				395
				396	if (Output.isFilename()) {
				397	CmdArgs.push_back("-o");
				398	CmdArgs.push_back(Output.getFilename());
				399	} else
				400	assert(Output.isNothing() && "Invalid output.");
				401	if (Args.hasArg(options::OPT_g_Flag))
				402	CmdArgs.push_back("-g");
				403
				404	if (Args.hasArg(options::OPT_v))
				405	CmdArgs.push_back("-v");
				406
				407	StringRef GPUArch =
				408	Args.getLastArgValue(options::OPT_march_EQ);
				409	assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
				410
				411	CmdArgs.push_back("-arch");
				412	CmdArgs.push_back(Args.MakeArgString(GPUArch));
				413
				414	// Add paths specified in LIBRARY_PATH environment variable as -L options.
				415	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
				416
				417	// Add paths for the default clang library path.
				418	SmallString<256> DefaultLibPath =
				419	llvm::sys::path::parent_path(TC.getDriver().Dir);
				420	llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
				421	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
				422
				423	// Add linking against library implementing OpenMP calls on NVPTX target.
				424	CmdArgs.push_back("-lomptarget-nvptx");
				425
				426	for (const auto &II : Inputs) {
				427	if (II.getType() == types::TY_LLVM_IR \|\|
				428	II.getType() == types::TY_LTO_IR \|\|
				429	II.getType() == types::TY_LTO_BC \|\|
				430	II.getType() == types::TY_LLVM_BC) {
				431	C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
				432	<< getToolChain().getTripleString();
				433	continue;
				434	}
				435
				436	// Currently, we only pass the input files to the linker, we do not pass
				437	// any libraries that may be valid only for the host.
				438	if (!II.isFilename())
				439	continue;
				440
				441	SmallString<256> Name = llvm::sys::path::filename(II.getFilename());
				442	llvm::sys::path::replace_extension(Name, "cubin");
				443
				444	const char *CubinF =
				445	C.addTempFile(C.getArgs().MakeArgString(Name));
				446
				447	CmdArgs.push_back(CubinF);
				448	}
				449
				450	AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
				451
				452	const char *Exec =
				453	Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
				454	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				455	}
				456
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	457	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
				458	/// which isn't properly a linker but nonetheless performs the step of stitching
				459	/// together object files from the assembler into a single blob.
				460
				461	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	462	const ToolChain &HostTC, const ArgList &Args,
				463	const Action::OffloadKind OK)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	464	: ToolChain(D, Triple, Args), HostTC(HostTC),
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	465	CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	466	if (CudaInstallation.isValid())
				467	getProgramPaths().push_back(CudaInstallation.getBinPath());
Gheorghe-Teodor Bercea	690f6f9	2017-08-09 19:52:28 +0000	[diff] [blame]	468	// Lookup binaries into the driver directory, this is used to
				469	// discover the clang-offload-bundler executable.
				470	getProgramPaths().push_back(getDriver().Dir);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	471	}
				472
				473	void CudaToolChain::addClangTargetOptions(
				474	const llvm::opt::ArgList &DriverArgs,
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	475	llvm::opt::ArgStringList &CC1Args,
				476	Action::OffloadKind DeviceOffloadingKind) const {
				477	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	478
				479	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				480	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	481	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
				482	DeviceOffloadingKind == Action::OFK_Cuda) &&
				483	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
				484
				485	if (DeviceOffloadingKind == Action::OFK_Cuda) {
				486	CC1Args.push_back("-fcuda-is-device");
				487
				488	if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
				489	options::OPT_fno_cuda_flush_denormals_to_zero, false))
				490	CC1Args.push_back("-fcuda-flush-denormals-to-zero");
				491
				492	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
				493	options::OPT_fno_cuda_approx_transcendentals, false))
				494	CC1Args.push_back("-fcuda-approx-transcendentals");
				495
				496	if (DriverArgs.hasArg(options::OPT_nocudalib))
				497	return;
				498	}
				499
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	500	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
				501
				502	if (LibDeviceFile.empty()) {
				503	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
				504	return;
				505	}
				506
				507	CC1Args.push_back("-mlink-cuda-bitcode");
				508	CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
				509
Artem Belevich	4654dc8	2017-09-20 21:23:07 +0000	[diff] [blame^]	510	if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
				511	// CUDA-9 uses new instructions that are only available in PTX6.0
				512	CC1Args.push_back("-target-feature");
				513	CC1Args.push_back("+ptx60");
				514	} else {
				515	// Libdevice in CUDA-7.0 requires PTX version that's more recent
				516	// than LLVM defaults to. Use PTX4.2 which is the PTX version that
				517	// came with CUDA-7.0.
				518	CC1Args.push_back("-target-feature");
				519	CC1Args.push_back("+ptx42");
				520	}
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	521	}
				522
				523	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
				524	ArgStringList &CC1Args) const {
				525	// Check our CUDA version if we're going to include the CUDA headers.
				526	if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
				527	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
				528	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				529	assert(!Arch.empty() && "Must have an explicit GPU arch.");
				530	CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
				531	}
				532	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
				533	}
				534
				535	llvm::opt::DerivedArgList *
				536	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
				537	StringRef BoundArch,
				538	Action::OffloadKind DeviceOffloadKind) const {
				539	DerivedArgList *DAL =
				540	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
				541	if (!DAL)
				542	DAL = new DerivedArgList(Args.getBaseArgs());
				543
				544	const OptTable &Opts = getDriver().getOpts();
				545
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	546	// For OpenMP device offloading, append derived arguments. Make sure
				547	// flags are not duplicated.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	548	// Also append the compute capability.
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	549	if (DeviceOffloadKind == Action::OFK_OpenMP) {
				550	for (Arg *A : Args){
				551	bool IsDuplicate = false;
				552	for (Arg DALArg : DAL){
				553	if (A == DALArg) {
				554	IsDuplicate = true;
				555	break;
				556	}
				557	}
				558	if (!IsDuplicate)
				559	DAL->append(A);
				560	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	561
				562	StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
Gheorghe-Teodor Bercea	14528c6	2017-08-10 16:56:59 +0000	[diff] [blame]	563	if (Arch.empty()) {
				564	// Default compute capability for CUDA toolchain is the
				565	// lowest compute capability supported by the installed
				566	// CUDA version.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	567	DAL->AddJoinedArg(nullptr,
Gheorghe-Teodor Bercea	14528c6	2017-08-10 16:56:59 +0000	[diff] [blame]	568	Opts.getOption(options::OPT_march_EQ),
				569	CudaInstallation.getLowestExistingArch());
				570	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	571
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	572	return DAL;
				573	}
				574
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	575	for (Arg *A : Args) {
				576	if (A->getOption().matches(options::OPT_Xarch__)) {
				577	// Skip this argument unless the architecture matches BoundArch
				578	if (BoundArch.empty() \|\| A->getValue(0) != BoundArch)
				579	continue;
				580
				581	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
				582	unsigned Prev = Index;
				583	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
				584
				585	// If the argument parsing failed or more than one argument was
				586	// consumed, the -Xarch_ argument's parameter tried to consume
				587	// extra arguments. Emit an error and ignore.
				588	//
				589	// We also want to disallow any options which would alter the
				590	// driver behavior; that isn't going to work in our model. We
				591	// use isDriverOption() as an approximation, although things
				592	// like -O4 are going to slip through.
				593	if (!XarchArg \|\| Index > Prev + 1) {
				594	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
				595	<< A->getAsString(Args);
				596	continue;
				597	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
				598	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
				599	<< A->getAsString(Args);
				600	continue;
				601	}
				602	XarchArg->setBaseArg(A);
				603	A = XarchArg.release();
				604	DAL->AddSynthesizedArg(A);
				605	}
				606	DAL->append(A);
				607	}
				608
				609	if (!BoundArch.empty()) {
				610	DAL->eraseArg(options::OPT_march_EQ);
				611	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
				612	}
				613	return DAL;
				614	}
				615
				616	Tool *CudaToolChain::buildAssembler() const {
				617	return new tools::NVPTX::Assembler(*this);
				618	}
				619
				620	Tool *CudaToolChain::buildLinker() const {
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame]	621	if (OK == Action::OFK_OpenMP)
				622	return new tools::NVPTX::OpenMPLinker(*this);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	623	return new tools::NVPTX::Linker(*this);
				624	}
				625
				626	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
				627	HostTC.addClangWarningOptions(CC1Args);
				628	}
				629
				630	ToolChain::CXXStdlibType
				631	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
				632	return HostTC.GetCXXStdlibType(Args);
				633	}
				634
				635	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
				636	ArgStringList &CC1Args) const {
				637	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
				638	}
				639
				640	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
				641	ArgStringList &CC1Args) const {
				642	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
				643	}
				644
				645	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
				646	ArgStringList &CC1Args) const {
				647	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
				648	}
				649
				650	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
				651	// The CudaToolChain only supports sanitizers in the sense that it allows
				652	// sanitizer arguments on the command line if they are supported by the host
				653	// toolchain. The CudaToolChain will actually ignore any command line
				654	// arguments for any of these "supported" sanitizers. That means that no
				655	// sanitization of device code is actually supported at this time.
				656	//
				657	// This behavior is necessary because the host and device toolchains
				658	// invocations often share the command line, so the device toolchain must
				659	// tolerate flags meant only for the host toolchain.
				660	return HostTC.getSupportedSanitizers();
				661	}
				662
				663	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
				664	const ArgList &Args) const {
				665	return HostTC.computeMSVCVersion(D, Args);
				666	}