Blame - clang/lib/Driver/ToolChains/Cuda.cpp - toolchain/llvm-project

blob: 9568f93b7819472f37147f2618577c8bac2ddd5c [file] [log] [blame]

David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "Cuda.h"
				11	#include "InputInfo.h"
Gheorghe-Teodor Bercea	4cdba82	2017-08-07 20:01:48 +0000	[diff] [blame^]	12	#include "CommonArgs.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	13	#include "clang/Basic/Cuda.h"
Gheorghe-Teodor Bercea	4cdba82	2017-08-07 20:01:48 +0000	[diff] [blame^]	14	#include "clang/Config/config.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	15	#include "clang/Basic/VirtualFileSystem.h"
				16	#include "clang/Driver/Compilation.h"
				17	#include "clang/Driver/Driver.h"
				18	#include "clang/Driver/DriverDiagnostic.h"
				19	#include "clang/Driver/Options.h"
				20	#include "llvm/Option/ArgList.h"
				21	#include "llvm/Support/Path.h"
				22	#include <system_error>
				23
				24	using namespace clang::driver;
				25	using namespace clang::driver::toolchains;
				26	using namespace clang::driver::tools;
				27	using namespace clang;
				28	using namespace llvm::opt;
				29
				30	// Parses the contents of version.txt in an CUDA installation. It should
				31	// contain one line of the from e.g. "CUDA Version 7.5.2".
				32	static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
				33	if (!V.startswith("CUDA Version "))
				34	return CudaVersion::UNKNOWN;
				35	V = V.substr(strlen("CUDA Version "));
				36	int Major = -1, Minor = -1;
				37	auto First = V.split('.');
				38	auto Second = First.second.split('.');
				39	if (First.first.getAsInteger(10, Major) \|\|
				40	Second.first.getAsInteger(10, Minor))
				41	return CudaVersion::UNKNOWN;
				42
				43	if (Major == 7 && Minor == 0) {
				44	// This doesn't appear to ever happen -- version.txt doesn't exist in the
				45	// CUDA 7 installs I've seen. But no harm in checking.
				46	return CudaVersion::CUDA_70;
				47	}
				48	if (Major == 7 && Minor == 5)
				49	return CudaVersion::CUDA_75;
				50	if (Major == 8 && Minor == 0)
				51	return CudaVersion::CUDA_80;
				52	return CudaVersion::UNKNOWN;
				53	}
				54
				55	CudaInstallationDetector::CudaInstallationDetector(
				56	const Driver &D, const llvm::Triple &HostTriple,
				57	const llvm::opt::ArgList &Args)
				58	: D(D) {
				59	SmallVector<std::string, 4> CudaPathCandidates;
				60
				61	// In decreasing order so we prefer newer versions to older versions.
				62	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
				63
				64	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
				65	CudaPathCandidates.push_back(
				66	Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
				67	} else if (HostTriple.isOSWindows()) {
				68	for (const char *Ver : Versions)
				69	CudaPathCandidates.push_back(
				70	D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
				71	Ver);
				72	} else {
				73	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
				74	for (const char *Ver : Versions)
				75	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
				76	}
				77
				78	for (const auto &CudaPath : CudaPathCandidates) {
				79	if (CudaPath.empty() \|\| !D.getVFS().exists(CudaPath))
				80	continue;
				81
				82	InstallPath = CudaPath;
				83	BinPath = CudaPath + "/bin";
				84	IncludePath = InstallPath + "/include";
				85	LibDevicePath = InstallPath + "/nvvm/libdevice";
				86
				87	auto &FS = D.getVFS();
				88	if (!(FS.exists(IncludePath) && FS.exists(BinPath) &&
				89	FS.exists(LibDevicePath)))
				90	continue;
				91
				92	// On Linux, we have both lib and lib64 directories, and we need to choose
				93	// based on our triple. On MacOS, we have only a lib directory.
				94	//
				95	// It's sufficient for our purposes to be flexible: If both lib and lib64
				96	// exist, we choose whichever one matches our triple. Otherwise, if only
				97	// lib exists, we use it.
				98	if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
				99	LibPath = InstallPath + "/lib64";
				100	else if (FS.exists(InstallPath + "/lib"))
				101	LibPath = InstallPath + "/lib";
				102	else
				103	continue;
				104
				105	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
				106	FS.getBufferForFile(InstallPath + "/version.txt");
				107	if (!VersionFile) {
				108	// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
				109	// version.txt isn't present.
				110	Version = CudaVersion::CUDA_70;
				111	} else {
				112	Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
				113	}
				114
				115	std::error_code EC;
				116	for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
				117	!EC && LI != LE; LI = LI.increment(EC)) {
				118	StringRef FilePath = LI->path();
				119	StringRef FileName = llvm::sys::path::filename(FilePath);
				120	// Process all bitcode filenames that look like libdevice.compute_XX.YY.bc
				121	const StringRef LibDeviceName = "libdevice.";
				122	if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
				123	continue;
				124	StringRef GpuArch = FileName.slice(
				125	LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
				126	LibDeviceMap[GpuArch] = FilePath.str();
				127	// Insert map entries for specifc devices with this compute
				128	// capability. NVCC's choice of the libdevice library version is
				129	// rather peculiar and depends on the CUDA version.
				130	if (GpuArch == "compute_20") {
				131	LibDeviceMap["sm_20"] = FilePath;
				132	LibDeviceMap["sm_21"] = FilePath;
				133	LibDeviceMap["sm_32"] = FilePath;
				134	} else if (GpuArch == "compute_30") {
				135	LibDeviceMap["sm_30"] = FilePath;
				136	if (Version < CudaVersion::CUDA_80) {
				137	LibDeviceMap["sm_50"] = FilePath;
				138	LibDeviceMap["sm_52"] = FilePath;
				139	LibDeviceMap["sm_53"] = FilePath;
				140	}
				141	LibDeviceMap["sm_60"] = FilePath;
				142	LibDeviceMap["sm_61"] = FilePath;
				143	LibDeviceMap["sm_62"] = FilePath;
				144	} else if (GpuArch == "compute_35") {
				145	LibDeviceMap["sm_35"] = FilePath;
				146	LibDeviceMap["sm_37"] = FilePath;
				147	} else if (GpuArch == "compute_50") {
				148	if (Version >= CudaVersion::CUDA_80) {
				149	LibDeviceMap["sm_50"] = FilePath;
				150	LibDeviceMap["sm_52"] = FilePath;
				151	LibDeviceMap["sm_53"] = FilePath;
				152	}
				153	}
				154	}
				155
				156	IsValid = true;
				157	break;
				158	}
				159	}
				160
				161	void CudaInstallationDetector::AddCudaIncludeArgs(
				162	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
				163	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
				164	// Add cuda_wrappers/* to our system include path. This lets us wrap
				165	// standard library headers.
				166	SmallString<128> P(D.ResourceDir);
				167	llvm::sys::path::append(P, "include");
				168	llvm::sys::path::append(P, "cuda_wrappers");
				169	CC1Args.push_back("-internal-isystem");
				170	CC1Args.push_back(DriverArgs.MakeArgString(P));
				171	}
				172
				173	if (DriverArgs.hasArg(options::OPT_nocudainc))
				174	return;
				175
				176	if (!isValid()) {
				177	D.Diag(diag::err_drv_no_cuda_installation);
				178	return;
				179	}
				180
				181	CC1Args.push_back("-internal-isystem");
				182	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
				183	CC1Args.push_back("-include");
				184	CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
				185	}
				186
				187	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
				188	CudaArch Arch) const {
				189	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
				190	ArchsWithVersionTooLowErrors.count(Arch) > 0)
				191	return;
				192
				193	auto RequiredVersion = MinVersionForCudaArch(Arch);
				194	if (Version < RequiredVersion) {
				195	ArchsWithVersionTooLowErrors.insert(Arch);
				196	D.Diag(diag::err_drv_cuda_version_too_low)
				197	<< InstallPath << CudaArchToString(Arch) << CudaVersionToString(Version)
				198	<< CudaVersionToString(RequiredVersion);
				199	}
				200	}
				201
				202	void CudaInstallationDetector::print(raw_ostream &OS) const {
				203	if (isValid())
				204	OS << "Found CUDA installation: " << InstallPath << ", version "
				205	<< CudaVersionToString(Version) << "\n";
				206	}
				207
				208	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
				209	const InputInfo &Output,
				210	const InputInfoList &Inputs,
				211	const ArgList &Args,
				212	const char *LinkingOutput) const {
				213	const auto &TC =
				214	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				215	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				216
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	217	StringRef GPUArchName;
				218	// If this is an OpenMP action we need to extract the device architecture
				219	// from the -march=arch option. This option may come from -Xopenmp-target
				220	// flag or the default value.
				221	if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
				222	GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
				223	assert(!GPUArchName.empty() && "Must have an architecture passed in.");
				224	} else
				225	GPUArchName = JA.getOffloadingArch();
				226
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	227	// Obtain architecture from the action.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	228	CudaArch gpu_arch = StringToCudaArch(GPUArchName);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	229	assert(gpu_arch != CudaArch::UNKNOWN &&
				230	"Device action expected to have an architecture.");
				231
				232	// Check that our installation's ptxas supports gpu_arch.
				233	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
				234	TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
				235	}
				236
				237	ArgStringList CmdArgs;
				238	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
				239	if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
				240	options::OPT_no_cuda_noopt_device_debug, false)) {
				241	// ptxas does not accept -g option if optimization is enabled, so
				242	// we ignore the compiler's -O* options if we want debug info.
				243	CmdArgs.push_back("-g");
				244	CmdArgs.push_back("--dont-merge-basicblocks");
				245	CmdArgs.push_back("--return-at-end");
				246	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
				247	// Map the -O we received to -O{0,1,2,3}.
				248	//
				249	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
				250	// default, so it may correspond more closely to the spirit of clang -O2.
				251
				252	// -O3 seems like the least-bad option when -Osomething is specified to
				253	// clang but it isn't handled below.
				254	StringRef OOpt = "3";
				255	if (A->getOption().matches(options::OPT_O4) \|\|
				256	A->getOption().matches(options::OPT_Ofast))
				257	OOpt = "3";
				258	else if (A->getOption().matches(options::OPT_O0))
				259	OOpt = "0";
				260	else if (A->getOption().matches(options::OPT_O)) {
				261	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
				262	OOpt = llvm::StringSwitch<const char *>(A->getValue())
				263	.Case("1", "1")
				264	.Case("2", "2")
				265	.Case("3", "3")
				266	.Case("s", "2")
				267	.Case("z", "2")
				268	.Default("2");
				269	}
				270	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
				271	} else {
				272	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
				273	// to no optimizations, but ptxas's default is -O3.
				274	CmdArgs.push_back("-O0");
				275	}
				276
				277	CmdArgs.push_back("--gpu-name");
				278	CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
				279	CmdArgs.push_back("--output-file");
Gheorghe-Teodor Bercea	4cdba82	2017-08-07 20:01:48 +0000	[diff] [blame^]	280	SmallString<256> OutputFileName(Output.getFilename());
				281	if (JA.isOffloading(Action::OFK_OpenMP))
				282	llvm::sys::path::replace_extension(OutputFileName, "cubin");
				283	CmdArgs.push_back(Args.MakeArgString(OutputFileName));
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	284	for (const auto& II : Inputs)
				285	CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
				286
				287	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
				288	CmdArgs.push_back(Args.MakeArgString(A));
				289
				290	const char *Exec;
				291	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
				292	Exec = A->getValue();
				293	else
				294	Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
				295	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				296	}
				297
				298	// All inputs to this linker must be from CudaDeviceActions, as we need to look
				299	// at the Inputs' Actions in order to figure out which GPU architecture they
				300	// correspond to.
				301	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
				302	const InputInfo &Output,
				303	const InputInfoList &Inputs,
				304	const ArgList &Args,
				305	const char *LinkingOutput) const {
				306	const auto &TC =
				307	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				308	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				309
				310	ArgStringList CmdArgs;
				311	CmdArgs.push_back("--cuda");
				312	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
				313	CmdArgs.push_back(Args.MakeArgString("--create"));
				314	CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
				315
				316	for (const auto& II : Inputs) {
				317	auto *A = II.getAction();
				318	assert(A->getInputs().size() == 1 &&
				319	"Device offload action is expected to have a single input");
				320	const char *gpu_arch_str = A->getOffloadingArch();
				321	assert(gpu_arch_str &&
				322	"Device action expected to have associated a GPU architecture!");
				323	CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
				324
				325	// We need to pass an Arch of the form "sm_XX" for cubin files and
				326	// "compute_XX" for ptx.
				327	const char *Arch =
				328	(II.getType() == types::TY_PP_Asm)
				329	? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
				330	: gpu_arch_str;
				331	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
				332	Arch + ",file=" + II.getFilename()));
				333	}
				334
				335	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
				336	CmdArgs.push_back(Args.MakeArgString(A));
				337
				338	const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
				339	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				340	}
				341
Gheorghe-Teodor Bercea	4cdba82	2017-08-07 20:01:48 +0000	[diff] [blame^]	342	void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
				343	const InputInfo &Output,
				344	const InputInfoList &Inputs,
				345	const ArgList &Args,
				346	const char *LinkingOutput) const {
				347	const auto &TC =
				348	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				349	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				350
				351	ArgStringList CmdArgs;
				352
				353	// OpenMP uses nvlink to link cubin files. The result will be embedded in the
				354	// host binary by the host linker.
				355	assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
				356	"CUDA toolchain not expected for an OpenMP host device.");
				357
				358	if (Output.isFilename()) {
				359	CmdArgs.push_back("-o");
				360	CmdArgs.push_back(Output.getFilename());
				361	} else
				362	assert(Output.isNothing() && "Invalid output.");
				363	if (Args.hasArg(options::OPT_g_Flag))
				364	CmdArgs.push_back("-g");
				365
				366	if (Args.hasArg(options::OPT_v))
				367	CmdArgs.push_back("-v");
				368
				369	StringRef GPUArch =
				370	Args.getLastArgValue(options::OPT_march_EQ);
				371	assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
				372
				373	CmdArgs.push_back("-arch");
				374	CmdArgs.push_back(Args.MakeArgString(GPUArch));
				375
				376	// Add paths specified in LIBRARY_PATH environment variable as -L options.
				377	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
				378
				379	// Add paths for the default clang library path.
				380	SmallString<256> DefaultLibPath =
				381	llvm::sys::path::parent_path(TC.getDriver().Dir);
				382	llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
				383	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
				384
				385	// Add linking against library implementing OpenMP calls on NVPTX target.
				386	CmdArgs.push_back("-lomptarget-nvptx");
				387
				388	for (const auto &II : Inputs) {
				389	if (II.getType() == types::TY_LLVM_IR \|\|
				390	II.getType() == types::TY_LTO_IR \|\|
				391	II.getType() == types::TY_LTO_BC \|\|
				392	II.getType() == types::TY_LLVM_BC) {
				393	C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
				394	<< getToolChain().getTripleString();
				395	continue;
				396	}
				397
				398	// Currently, we only pass the input files to the linker, we do not pass
				399	// any libraries that may be valid only for the host.
				400	if (!II.isFilename())
				401	continue;
				402
				403	SmallString<256> Name = llvm::sys::path::filename(II.getFilename());
				404	llvm::sys::path::replace_extension(Name, "cubin");
				405
				406	const char *CubinF =
				407	C.addTempFile(C.getArgs().MakeArgString(Name));
				408
				409	CmdArgs.push_back(CubinF);
				410	}
				411
				412	AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
				413
				414	const char *Exec =
				415	Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
				416	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				417	}
				418
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	419	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
				420	/// which isn't properly a linker but nonetheless performs the step of stitching
				421	/// together object files from the assembler into a single blob.
				422
				423	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
Gheorghe-Teodor Bercea	4cdba82	2017-08-07 20:01:48 +0000	[diff] [blame^]	424	const ToolChain &HostTC, const ArgList &Args,
				425	const Action::OffloadKind OK)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	426	: ToolChain(D, Triple, Args), HostTC(HostTC),
Gheorghe-Teodor Bercea	4cdba82	2017-08-07 20:01:48 +0000	[diff] [blame^]	427	CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	428	if (CudaInstallation.isValid())
				429	getProgramPaths().push_back(CudaInstallation.getBinPath());
				430	}
				431
				432	void CudaToolChain::addClangTargetOptions(
				433	const llvm::opt::ArgList &DriverArgs,
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	434	llvm::opt::ArgStringList &CC1Args,
				435	Action::OffloadKind DeviceOffloadingKind) const {
				436	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	437
				438	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				439	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	440	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
				441	DeviceOffloadingKind == Action::OFK_Cuda) &&
				442	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
				443
				444	if (DeviceOffloadingKind == Action::OFK_Cuda) {
				445	CC1Args.push_back("-fcuda-is-device");
				446
				447	if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
				448	options::OPT_fno_cuda_flush_denormals_to_zero, false))
				449	CC1Args.push_back("-fcuda-flush-denormals-to-zero");
				450
				451	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
				452	options::OPT_fno_cuda_approx_transcendentals, false))
				453	CC1Args.push_back("-fcuda-approx-transcendentals");
				454
				455	if (DriverArgs.hasArg(options::OPT_nocudalib))
				456	return;
				457	}
				458
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	459	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
				460
				461	if (LibDeviceFile.empty()) {
				462	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
				463	return;
				464	}
				465
				466	CC1Args.push_back("-mlink-cuda-bitcode");
				467	CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
				468
				469	// Libdevice in CUDA-7.0 requires PTX version that's more recent
				470	// than LLVM defaults to. Use PTX4.2 which is the PTX version that
				471	// came with CUDA-7.0.
				472	CC1Args.push_back("-target-feature");
				473	CC1Args.push_back("+ptx42");
				474	}
				475
				476	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
				477	ArgStringList &CC1Args) const {
				478	// Check our CUDA version if we're going to include the CUDA headers.
				479	if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
				480	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
				481	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				482	assert(!Arch.empty() && "Must have an explicit GPU arch.");
				483	CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
				484	}
				485	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
				486	}
				487
				488	llvm::opt::DerivedArgList *
				489	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
				490	StringRef BoundArch,
				491	Action::OffloadKind DeviceOffloadKind) const {
				492	DerivedArgList *DAL =
				493	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
				494	if (!DAL)
				495	DAL = new DerivedArgList(Args.getBaseArgs());
				496
				497	const OptTable &Opts = getDriver().getOpts();
				498
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	499	// For OpenMP device offloading, append derived arguments. Make sure
				500	// flags are not duplicated.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	501	// Also append the compute capability.
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	502	if (DeviceOffloadKind == Action::OFK_OpenMP) {
				503	for (Arg *A : Args){
				504	bool IsDuplicate = false;
				505	for (Arg DALArg : DAL){
				506	if (A == DALArg) {
				507	IsDuplicate = true;
				508	break;
				509	}
				510	}
				511	if (!IsDuplicate)
				512	DAL->append(A);
				513	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	514
				515	StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
				516	if (Arch.empty())
				517	// Default compute capability for CUDA toolchain is sm_20.
				518	DAL->AddJoinedArg(nullptr,
				519	Opts.getOption(options::OPT_march_EQ), "sm_20");
				520
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	521	return DAL;
				522	}
				523
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	524	for (Arg *A : Args) {
				525	if (A->getOption().matches(options::OPT_Xarch__)) {
				526	// Skip this argument unless the architecture matches BoundArch
				527	if (BoundArch.empty() \|\| A->getValue(0) != BoundArch)
				528	continue;
				529
				530	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
				531	unsigned Prev = Index;
				532	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
				533
				534	// If the argument parsing failed or more than one argument was
				535	// consumed, the -Xarch_ argument's parameter tried to consume
				536	// extra arguments. Emit an error and ignore.
				537	//
				538	// We also want to disallow any options which would alter the
				539	// driver behavior; that isn't going to work in our model. We
				540	// use isDriverOption() as an approximation, although things
				541	// like -O4 are going to slip through.
				542	if (!XarchArg \|\| Index > Prev + 1) {
				543	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
				544	<< A->getAsString(Args);
				545	continue;
				546	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
				547	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
				548	<< A->getAsString(Args);
				549	continue;
				550	}
				551	XarchArg->setBaseArg(A);
				552	A = XarchArg.release();
				553	DAL->AddSynthesizedArg(A);
				554	}
				555	DAL->append(A);
				556	}
				557
				558	if (!BoundArch.empty()) {
				559	DAL->eraseArg(options::OPT_march_EQ);
				560	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
				561	}
				562	return DAL;
				563	}
				564
				565	Tool *CudaToolChain::buildAssembler() const {
				566	return new tools::NVPTX::Assembler(*this);
				567	}
				568
				569	Tool *CudaToolChain::buildLinker() const {
Gheorghe-Teodor Bercea	4cdba82	2017-08-07 20:01:48 +0000	[diff] [blame^]	570	if (OK == Action::OFK_OpenMP)
				571	return new tools::NVPTX::OpenMPLinker(*this);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	572	return new tools::NVPTX::Linker(*this);
				573	}
				574
				575	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
				576	HostTC.addClangWarningOptions(CC1Args);
				577	}
				578
				579	ToolChain::CXXStdlibType
				580	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
				581	return HostTC.GetCXXStdlibType(Args);
				582	}
				583
				584	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
				585	ArgStringList &CC1Args) const {
				586	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
				587	}
				588
				589	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
				590	ArgStringList &CC1Args) const {
				591	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
				592	}
				593
				594	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
				595	ArgStringList &CC1Args) const {
				596	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
				597	}
				598
				599	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
				600	// The CudaToolChain only supports sanitizers in the sense that it allows
				601	// sanitizer arguments on the command line if they are supported by the host
				602	// toolchain. The CudaToolChain will actually ignore any command line
				603	// arguments for any of these "supported" sanitizers. That means that no
				604	// sanitization of device code is actually supported at this time.
				605	//
				606	// This behavior is necessary because the host and device toolchains
				607	// invocations often share the command line, so the device toolchain must
				608	// tolerate flags meant only for the host toolchain.
				609	return HostTC.getSupportedSanitizers();
				610	}
				611
				612	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
				613	const ArgList &Args) const {
				614	return HostTC.computeMSVCVersion(D, Args);
				615	}