Blame - clang/lib/Driver/ToolChains/Cuda.cpp - toolchain/llvm-project

blob: 70f472fb025f3f9fa6a06a963ddf7c4471f8c37f [file] [log] [blame]

David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	1	//===--- Cuda.cpp - Cuda Tool and ToolChain Implementations ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "Cuda.h"
				11	#include "InputInfo.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame^]	12	#include "CommonArgs.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	13	#include "clang/Basic/Cuda.h"
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame^]	14	#include "clang/Config/config.h"
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	15	#include "clang/Basic/VirtualFileSystem.h"
				16	#include "clang/Driver/Compilation.h"
				17	#include "clang/Driver/Driver.h"
				18	#include "clang/Driver/DriverDiagnostic.h"
				19	#include "clang/Driver/Options.h"
				20	#include "llvm/Option/ArgList.h"
				21	#include "llvm/Support/Path.h"
				22	#include <system_error>
				23
				24	using namespace clang::driver;
				25	using namespace clang::driver::toolchains;
				26	using namespace clang::driver::tools;
				27	using namespace clang;
				28	using namespace llvm::opt;
				29
				30	// Parses the contents of version.txt in an CUDA installation. It should
				31	// contain one line of the from e.g. "CUDA Version 7.5.2".
				32	static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
				33	if (!V.startswith("CUDA Version "))
				34	return CudaVersion::UNKNOWN;
				35	V = V.substr(strlen("CUDA Version "));
				36	int Major = -1, Minor = -1;
				37	auto First = V.split('.');
				38	auto Second = First.second.split('.');
				39	if (First.first.getAsInteger(10, Major) \|\|
				40	Second.first.getAsInteger(10, Minor))
				41	return CudaVersion::UNKNOWN;
				42
				43	if (Major == 7 && Minor == 0) {
				44	// This doesn't appear to ever happen -- version.txt doesn't exist in the
				45	// CUDA 7 installs I've seen. But no harm in checking.
				46	return CudaVersion::CUDA_70;
				47	}
				48	if (Major == 7 && Minor == 5)
				49	return CudaVersion::CUDA_75;
				50	if (Major == 8 && Minor == 0)
				51	return CudaVersion::CUDA_80;
				52	return CudaVersion::UNKNOWN;
				53	}
				54
				55	CudaInstallationDetector::CudaInstallationDetector(
				56	const Driver &D, const llvm::Triple &HostTriple,
				57	const llvm::opt::ArgList &Args)
				58	: D(D) {
				59	SmallVector<std::string, 4> CudaPathCandidates;
				60
				61	// In decreasing order so we prefer newer versions to older versions.
				62	std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
				63
				64	if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
				65	CudaPathCandidates.push_back(
				66	Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
				67	} else if (HostTriple.isOSWindows()) {
				68	for (const char *Ver : Versions)
				69	CudaPathCandidates.push_back(
				70	D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
				71	Ver);
				72	} else {
				73	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
				74	for (const char *Ver : Versions)
				75	CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
				76	}
				77
				78	for (const auto &CudaPath : CudaPathCandidates) {
				79	if (CudaPath.empty() \|\| !D.getVFS().exists(CudaPath))
				80	continue;
				81
				82	InstallPath = CudaPath;
				83	BinPath = CudaPath + "/bin";
				84	IncludePath = InstallPath + "/include";
				85	LibDevicePath = InstallPath + "/nvvm/libdevice";
				86
				87	auto &FS = D.getVFS();
				88	if (!(FS.exists(IncludePath) && FS.exists(BinPath) &&
				89	FS.exists(LibDevicePath)))
				90	continue;
				91
				92	// On Linux, we have both lib and lib64 directories, and we need to choose
				93	// based on our triple. On MacOS, we have only a lib directory.
				94	//
				95	// It's sufficient for our purposes to be flexible: If both lib and lib64
				96	// exist, we choose whichever one matches our triple. Otherwise, if only
				97	// lib exists, we use it.
				98	if (HostTriple.isArch64Bit() && FS.exists(InstallPath + "/lib64"))
				99	LibPath = InstallPath + "/lib64";
				100	else if (FS.exists(InstallPath + "/lib"))
				101	LibPath = InstallPath + "/lib";
				102	else
				103	continue;
				104
				105	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
				106	FS.getBufferForFile(InstallPath + "/version.txt");
				107	if (!VersionFile) {
				108	// CUDA 7.0 doesn't have a version.txt, so guess that's our version if
				109	// version.txt isn't present.
				110	Version = CudaVersion::CUDA_70;
				111	} else {
				112	Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
				113	}
				114
				115	std::error_code EC;
				116	for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
				117	!EC && LI != LE; LI = LI.increment(EC)) {
				118	StringRef FilePath = LI->path();
				119	StringRef FileName = llvm::sys::path::filename(FilePath);
				120	// Process all bitcode filenames that look like libdevice.compute_XX.YY.bc
				121	const StringRef LibDeviceName = "libdevice.";
				122	if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
				123	continue;
				124	StringRef GpuArch = FileName.slice(
				125	LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
				126	LibDeviceMap[GpuArch] = FilePath.str();
				127	// Insert map entries for specifc devices with this compute
				128	// capability. NVCC's choice of the libdevice library version is
				129	// rather peculiar and depends on the CUDA version.
				130	if (GpuArch == "compute_20") {
				131	LibDeviceMap["sm_20"] = FilePath;
				132	LibDeviceMap["sm_21"] = FilePath;
				133	LibDeviceMap["sm_32"] = FilePath;
				134	} else if (GpuArch == "compute_30") {
				135	LibDeviceMap["sm_30"] = FilePath;
				136	if (Version < CudaVersion::CUDA_80) {
				137	LibDeviceMap["sm_50"] = FilePath;
				138	LibDeviceMap["sm_52"] = FilePath;
				139	LibDeviceMap["sm_53"] = FilePath;
				140	}
				141	LibDeviceMap["sm_60"] = FilePath;
				142	LibDeviceMap["sm_61"] = FilePath;
				143	LibDeviceMap["sm_62"] = FilePath;
				144	} else if (GpuArch == "compute_35") {
				145	LibDeviceMap["sm_35"] = FilePath;
				146	LibDeviceMap["sm_37"] = FilePath;
				147	} else if (GpuArch == "compute_50") {
				148	if (Version >= CudaVersion::CUDA_80) {
				149	LibDeviceMap["sm_50"] = FilePath;
				150	LibDeviceMap["sm_52"] = FilePath;
				151	LibDeviceMap["sm_53"] = FilePath;
				152	}
				153	}
				154	}
				155
				156	IsValid = true;
				157	break;
				158	}
				159	}
				160
				161	void CudaInstallationDetector::AddCudaIncludeArgs(
				162	const ArgList &DriverArgs, ArgStringList &CC1Args) const {
				163	if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
				164	// Add cuda_wrappers/* to our system include path. This lets us wrap
				165	// standard library headers.
				166	SmallString<128> P(D.ResourceDir);
				167	llvm::sys::path::append(P, "include");
				168	llvm::sys::path::append(P, "cuda_wrappers");
				169	CC1Args.push_back("-internal-isystem");
				170	CC1Args.push_back(DriverArgs.MakeArgString(P));
				171	}
				172
				173	if (DriverArgs.hasArg(options::OPT_nocudainc))
				174	return;
				175
				176	if (!isValid()) {
				177	D.Diag(diag::err_drv_no_cuda_installation);
				178	return;
				179	}
				180
				181	CC1Args.push_back("-internal-isystem");
				182	CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
				183	CC1Args.push_back("-include");
				184	CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
				185	}
				186
				187	void CudaInstallationDetector::CheckCudaVersionSupportsArch(
				188	CudaArch Arch) const {
				189	if (Arch == CudaArch::UNKNOWN \|\| Version == CudaVersion::UNKNOWN \|\|
				190	ArchsWithVersionTooLowErrors.count(Arch) > 0)
				191	return;
				192
				193	auto RequiredVersion = MinVersionForCudaArch(Arch);
				194	if (Version < RequiredVersion) {
				195	ArchsWithVersionTooLowErrors.insert(Arch);
				196	D.Diag(diag::err_drv_cuda_version_too_low)
				197	<< InstallPath << CudaArchToString(Arch) << CudaVersionToString(Version)
				198	<< CudaVersionToString(RequiredVersion);
				199	}
				200	}
				201
				202	void CudaInstallationDetector::print(raw_ostream &OS) const {
				203	if (isValid())
				204	OS << "Found CUDA installation: " << InstallPath << ", version "
				205	<< CudaVersionToString(Version) << "\n";
				206	}
				207
				208	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
				209	const InputInfo &Output,
				210	const InputInfoList &Inputs,
				211	const ArgList &Args,
				212	const char *LinkingOutput) const {
				213	const auto &TC =
				214	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				215	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				216
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	217	StringRef GPUArchName;
				218	// If this is an OpenMP action we need to extract the device architecture
				219	// from the -march=arch option. This option may come from -Xopenmp-target
				220	// flag or the default value.
				221	if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
				222	GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
				223	assert(!GPUArchName.empty() && "Must have an architecture passed in.");
				224	} else
				225	GPUArchName = JA.getOffloadingArch();
				226
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	227	// Obtain architecture from the action.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	228	CudaArch gpu_arch = StringToCudaArch(GPUArchName);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	229	assert(gpu_arch != CudaArch::UNKNOWN &&
				230	"Device action expected to have an architecture.");
				231
				232	// Check that our installation's ptxas supports gpu_arch.
				233	if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
				234	TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
				235	}
				236
				237	ArgStringList CmdArgs;
				238	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
				239	if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
				240	options::OPT_no_cuda_noopt_device_debug, false)) {
				241	// ptxas does not accept -g option if optimization is enabled, so
				242	// we ignore the compiler's -O* options if we want debug info.
				243	CmdArgs.push_back("-g");
				244	CmdArgs.push_back("--dont-merge-basicblocks");
				245	CmdArgs.push_back("--return-at-end");
				246	} else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
				247	// Map the -O we received to -O{0,1,2,3}.
				248	//
				249	// TODO: Perhaps we should map host -O2 to ptxas -O3. -O3 is ptxas's
				250	// default, so it may correspond more closely to the spirit of clang -O2.
				251
				252	// -O3 seems like the least-bad option when -Osomething is specified to
				253	// clang but it isn't handled below.
				254	StringRef OOpt = "3";
				255	if (A->getOption().matches(options::OPT_O4) \|\|
				256	A->getOption().matches(options::OPT_Ofast))
				257	OOpt = "3";
				258	else if (A->getOption().matches(options::OPT_O0))
				259	OOpt = "0";
				260	else if (A->getOption().matches(options::OPT_O)) {
				261	// -Os, -Oz, and -O(anything else) map to -O2, for lack of better options.
				262	OOpt = llvm::StringSwitch<const char *>(A->getValue())
				263	.Case("1", "1")
				264	.Case("2", "2")
				265	.Case("3", "3")
				266	.Case("s", "2")
				267	.Case("z", "2")
				268	.Default("2");
				269	}
				270	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-O") + OOpt));
				271	} else {
				272	// If no -O was passed, pass -O0 to ptxas -- no opt flag should correspond
				273	// to no optimizations, but ptxas's default is -O3.
				274	CmdArgs.push_back("-O0");
				275	}
				276
Gheorghe-Teodor Bercea	53431bc	2017-08-07 20:19:23 +0000	[diff] [blame]	277	// Pass -v to ptxas if it was passed to the driver.
				278	if (Args.hasArg(options::OPT_v))
				279	CmdArgs.push_back("-v");
				280
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	281	CmdArgs.push_back("--gpu-name");
				282	CmdArgs.push_back(Args.MakeArgString(CudaArchToString(gpu_arch)));
				283	CmdArgs.push_back("--output-file");
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame^]	284	SmallString<256> OutputFileName(Output.getFilename());
				285	if (JA.isOffloading(Action::OFK_OpenMP))
				286	llvm::sys::path::replace_extension(OutputFileName, "cubin");
				287	CmdArgs.push_back(Args.MakeArgString(OutputFileName));
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	288	for (const auto& II : Inputs)
				289	CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
				290
				291	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
				292	CmdArgs.push_back(Args.MakeArgString(A));
				293
				294	const char *Exec;
				295	if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
				296	Exec = A->getValue();
				297	else
				298	Exec = Args.MakeArgString(TC.GetProgramPath("ptxas"));
				299	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				300	}
				301
				302	// All inputs to this linker must be from CudaDeviceActions, as we need to look
				303	// at the Inputs' Actions in order to figure out which GPU architecture they
				304	// correspond to.
				305	void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
				306	const InputInfo &Output,
				307	const InputInfoList &Inputs,
				308	const ArgList &Args,
				309	const char *LinkingOutput) const {
				310	const auto &TC =
				311	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				312	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				313
				314	ArgStringList CmdArgs;
				315	CmdArgs.push_back("--cuda");
				316	CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
				317	CmdArgs.push_back(Args.MakeArgString("--create"));
				318	CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
				319
				320	for (const auto& II : Inputs) {
				321	auto *A = II.getAction();
				322	assert(A->getInputs().size() == 1 &&
				323	"Device offload action is expected to have a single input");
				324	const char *gpu_arch_str = A->getOffloadingArch();
				325	assert(gpu_arch_str &&
				326	"Device action expected to have associated a GPU architecture!");
				327	CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
				328
				329	// We need to pass an Arch of the form "sm_XX" for cubin files and
				330	// "compute_XX" for ptx.
				331	const char *Arch =
				332	(II.getType() == types::TY_PP_Asm)
				333	? CudaVirtualArchToString(VirtualArchForCudaArch(gpu_arch))
				334	: gpu_arch_str;
				335	CmdArgs.push_back(Args.MakeArgString(llvm::Twine("--image=profile=") +
				336	Arch + ",file=" + II.getFilename()));
				337	}
				338
				339	for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
				340	CmdArgs.push_back(Args.MakeArgString(A));
				341
				342	const char *Exec = Args.MakeArgString(TC.GetProgramPath("fatbinary"));
				343	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				344	}
				345
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame^]	346	void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
				347	const InputInfo &Output,
				348	const InputInfoList &Inputs,
				349	const ArgList &Args,
				350	const char *LinkingOutput) const {
				351	const auto &TC =
				352	static_cast<const toolchains::CudaToolChain &>(getToolChain());
				353	assert(TC.getTriple().isNVPTX() && "Wrong platform");
				354
				355	ArgStringList CmdArgs;
				356
				357	// OpenMP uses nvlink to link cubin files. The result will be embedded in the
				358	// host binary by the host linker.
				359	assert(!JA.isHostOffloading(Action::OFK_OpenMP) &&
				360	"CUDA toolchain not expected for an OpenMP host device.");
				361
				362	if (Output.isFilename()) {
				363	CmdArgs.push_back("-o");
				364	CmdArgs.push_back(Output.getFilename());
				365	} else
				366	assert(Output.isNothing() && "Invalid output.");
				367	if (Args.hasArg(options::OPT_g_Flag))
				368	CmdArgs.push_back("-g");
				369
				370	if (Args.hasArg(options::OPT_v))
				371	CmdArgs.push_back("-v");
				372
				373	StringRef GPUArch =
				374	Args.getLastArgValue(options::OPT_march_EQ);
				375	assert(!GPUArch.empty() && "At least one GPU Arch required for ptxas.");
				376
				377	CmdArgs.push_back("-arch");
				378	CmdArgs.push_back(Args.MakeArgString(GPUArch));
				379
				380	// Add paths specified in LIBRARY_PATH environment variable as -L options.
				381	addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
				382
				383	// Add paths for the default clang library path.
				384	SmallString<256> DefaultLibPath =
				385	llvm::sys::path::parent_path(TC.getDriver().Dir);
				386	llvm::sys::path::append(DefaultLibPath, "lib" CLANG_LIBDIR_SUFFIX);
				387	CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
				388
				389	// Add linking against library implementing OpenMP calls on NVPTX target.
				390	CmdArgs.push_back("-lomptarget-nvptx");
				391
				392	for (const auto &II : Inputs) {
				393	if (II.getType() == types::TY_LLVM_IR \|\|
				394	II.getType() == types::TY_LTO_IR \|\|
				395	II.getType() == types::TY_LTO_BC \|\|
				396	II.getType() == types::TY_LLVM_BC) {
				397	C.getDriver().Diag(diag::err_drv_no_linker_llvm_support)
				398	<< getToolChain().getTripleString();
				399	continue;
				400	}
				401
				402	// Currently, we only pass the input files to the linker, we do not pass
				403	// any libraries that may be valid only for the host.
				404	if (!II.isFilename())
				405	continue;
				406
				407	SmallString<256> Name = llvm::sys::path::filename(II.getFilename());
				408	llvm::sys::path::replace_extension(Name, "cubin");
				409
				410	const char *CubinF =
				411	C.addTempFile(C.getArgs().MakeArgString(Name));
				412
				413	CmdArgs.push_back(CubinF);
				414	}
				415
				416	AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
				417
				418	const char *Exec =
				419	Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
				420	C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
				421	}
				422
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	423	/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
				424	/// which isn't properly a linker but nonetheless performs the step of stitching
				425	/// together object files from the assembler into a single blob.
				426
				427	CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame^]	428	const ToolChain &HostTC, const ArgList &Args,
				429	const Action::OffloadKind OK)
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	430	: ToolChain(D, Triple, Args), HostTC(HostTC),
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame^]	431	CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	432	if (CudaInstallation.isValid())
				433	getProgramPaths().push_back(CudaInstallation.getBinPath());
				434	}
				435
				436	void CudaToolChain::addClangTargetOptions(
				437	const llvm::opt::ArgList &DriverArgs,
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	438	llvm::opt::ArgStringList &CC1Args,
				439	Action::OffloadKind DeviceOffloadingKind) const {
				440	HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	441
				442	StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				443	assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	444	assert((DeviceOffloadingKind == Action::OFK_OpenMP \|\|
				445	DeviceOffloadingKind == Action::OFK_Cuda) &&
				446	"Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
				447
				448	if (DeviceOffloadingKind == Action::OFK_Cuda) {
				449	CC1Args.push_back("-fcuda-is-device");
				450
				451	if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
				452	options::OPT_fno_cuda_flush_denormals_to_zero, false))
				453	CC1Args.push_back("-fcuda-flush-denormals-to-zero");
				454
				455	if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
				456	options::OPT_fno_cuda_approx_transcendentals, false))
				457	CC1Args.push_back("-fcuda-approx-transcendentals");
				458
				459	if (DriverArgs.hasArg(options::OPT_nocudalib))
				460	return;
				461	}
				462
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	463	std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
				464
				465	if (LibDeviceFile.empty()) {
				466	getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
				467	return;
				468	}
				469
				470	CC1Args.push_back("-mlink-cuda-bitcode");
				471	CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
				472
				473	// Libdevice in CUDA-7.0 requires PTX version that's more recent
				474	// than LLVM defaults to. Use PTX4.2 which is the PTX version that
				475	// came with CUDA-7.0.
				476	CC1Args.push_back("-target-feature");
				477	CC1Args.push_back("+ptx42");
				478	}
				479
				480	void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
				481	ArgStringList &CC1Args) const {
				482	// Check our CUDA version if we're going to include the CUDA headers.
				483	if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
				484	!DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
				485	StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
				486	assert(!Arch.empty() && "Must have an explicit GPU arch.");
				487	CudaInstallation.CheckCudaVersionSupportsArch(StringToCudaArch(Arch));
				488	}
				489	CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
				490	}
				491
				492	llvm::opt::DerivedArgList *
				493	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
				494	StringRef BoundArch,
				495	Action::OffloadKind DeviceOffloadKind) const {
				496	DerivedArgList *DAL =
				497	HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
				498	if (!DAL)
				499	DAL = new DerivedArgList(Args.getBaseArgs());
				500
				501	const OptTable &Opts = getDriver().getOpts();
				502
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	503	// For OpenMP device offloading, append derived arguments. Make sure
				504	// flags are not duplicated.
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	505	// Also append the compute capability.
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	506	if (DeviceOffloadKind == Action::OFK_OpenMP) {
				507	for (Arg *A : Args){
				508	bool IsDuplicate = false;
				509	for (Arg DALArg : DAL){
				510	if (A == DALArg) {
				511	IsDuplicate = true;
				512	break;
				513	}
				514	}
				515	if (!IsDuplicate)
				516	DAL->append(A);
				517	}
Gheorghe-Teodor Bercea	47e0cf3	2017-08-07 15:39:11 +0000	[diff] [blame]	518
				519	StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
				520	if (Arch.empty())
				521	// Default compute capability for CUDA toolchain is sm_20.
				522	DAL->AddJoinedArg(nullptr,
				523	Opts.getOption(options::OPT_march_EQ), "sm_20");
				524
Gheorghe-Teodor Bercea	f0f2960	2017-07-06 16:22:21 +0000	[diff] [blame]	525	return DAL;
				526	}
				527
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	528	for (Arg *A : Args) {
				529	if (A->getOption().matches(options::OPT_Xarch__)) {
				530	// Skip this argument unless the architecture matches BoundArch
				531	if (BoundArch.empty() \|\| A->getValue(0) != BoundArch)
				532	continue;
				533
				534	unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
				535	unsigned Prev = Index;
				536	std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
				537
				538	// If the argument parsing failed or more than one argument was
				539	// consumed, the -Xarch_ argument's parameter tried to consume
				540	// extra arguments. Emit an error and ignore.
				541	//
				542	// We also want to disallow any options which would alter the
				543	// driver behavior; that isn't going to work in our model. We
				544	// use isDriverOption() as an approximation, although things
				545	// like -O4 are going to slip through.
				546	if (!XarchArg \|\| Index > Prev + 1) {
				547	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
				548	<< A->getAsString(Args);
				549	continue;
				550	} else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
				551	getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
				552	<< A->getAsString(Args);
				553	continue;
				554	}
				555	XarchArg->setBaseArg(A);
				556	A = XarchArg.release();
				557	DAL->AddSynthesizedArg(A);
				558	}
				559	DAL->append(A);
				560	}
				561
				562	if (!BoundArch.empty()) {
				563	DAL->eraseArg(options::OPT_march_EQ);
				564	DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
				565	}
				566	return DAL;
				567	}
				568
				569	Tool *CudaToolChain::buildAssembler() const {
				570	return new tools::NVPTX::Assembler(*this);
				571	}
				572
				573	Tool *CudaToolChain::buildLinker() const {
Gheorghe-Teodor Bercea	2c92693	2017-08-08 14:33:05 +0000	[diff] [blame^]	574	if (OK == Action::OFK_OpenMP)
				575	return new tools::NVPTX::OpenMPLinker(*this);
David L. Jones	f561aba	2017-03-08 01:02:16 +0000	[diff] [blame]	576	return new tools::NVPTX::Linker(*this);
				577	}
				578
				579	void CudaToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
				580	HostTC.addClangWarningOptions(CC1Args);
				581	}
				582
				583	ToolChain::CXXStdlibType
				584	CudaToolChain::GetCXXStdlibType(const ArgList &Args) const {
				585	return HostTC.GetCXXStdlibType(Args);
				586	}
				587
				588	void CudaToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
				589	ArgStringList &CC1Args) const {
				590	HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
				591	}
				592
				593	void CudaToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
				594	ArgStringList &CC1Args) const {
				595	HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
				596	}
				597
				598	void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
				599	ArgStringList &CC1Args) const {
				600	HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
				601	}
				602
				603	SanitizerMask CudaToolChain::getSupportedSanitizers() const {
				604	// The CudaToolChain only supports sanitizers in the sense that it allows
				605	// sanitizer arguments on the command line if they are supported by the host
				606	// toolchain. The CudaToolChain will actually ignore any command line
				607	// arguments for any of these "supported" sanitizers. That means that no
				608	// sanitization of device code is actually supported at this time.
				609	//
				610	// This behavior is necessary because the host and device toolchains
				611	// invocations often share the command line, so the device toolchain must
				612	// tolerate flags meant only for the host toolchain.
				613	return HostTC.getSupportedSanitizers();
				614	}
				615
				616	VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D,
				617	const ArgList &Args) const {
				618	return HostTC.computeMSVCVersion(D, Args);
				619	}