[cuda] Driver changes to compile and stitch together host and device-side CUDA code.
NOTE: reverts r242077 to reinstate r242058, r242065, 242067
and includes fix for OS X test failures.
- Changed driver pipeline to compile host and device side of CUDA
files and incorporate results of device-side compilation into host
object file.
- Added a test for cuda pipeline creation in clang driver.
New clang options:
--cuda-host-only - Do host-side compilation only.
--cuda-device-only - Do device-side compilation only.
--cuda-gpu-arch=<ARCH> - specify GPU architecture for device-side
compilation. E.g. sm_35, sm_30. Default is sm_20. May be used more
than once in which case one device-compilation will be done per
unique specified GPU architecture.
Differential Revision: http://reviews.llvm.org/D9509
llvm-svn: 242085
diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp
index bf9b4ba..c6dc178 100644
--- a/clang/lib/Driver/Tools.cpp
+++ b/clang/lib/Driver/Tools.cpp
@@ -1488,6 +1488,12 @@
return CPUName;
}
+ case llvm::Triple::nvptx:
+ case llvm::Triple::nvptx64:
+ if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
+ return A->getValue();
+ return "";
+
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le: {
@@ -2826,8 +2832,14 @@
getToolChain().getTriple().isWindowsCygwinEnvironment();
bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment();
- assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
+ // Check number of inputs for sanity. We need at least one input.
+ assert(Inputs.size() >= 1 && "Must have at least one input.");
const InputInfo &Input = Inputs[0];
+ // CUDA compilation may have multiple inputs (source file + results of
+ // device-side compilations). All other jobs are expected to have exactly one
+ // input.
+ bool IsCuda = types::isCuda(Input.getType());
+ assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple inputs.");
// Invoke ourselves in -cc1 mode.
//
@@ -4812,14 +4824,12 @@
assert(Output.isNothing() && "Invalid output.");
}
- for (const auto &II : Inputs) {
- addDashXForInput(Args, II, CmdArgs);
+ addDashXForInput(Args, Input, CmdArgs);
- if (II.isFilename())
- CmdArgs.push_back(II.getFilename());
- else
- II.getInputArg().renderAsInput(Args, CmdArgs);
- }
+ if (Input.isFilename())
+ CmdArgs.push_back(Input.getFilename());
+ else
+ Input.getInputArg().renderAsInput(Args, CmdArgs);
Args.AddAllArgs(CmdArgs, options::OPT_undef);
@@ -4857,6 +4867,16 @@
CmdArgs.push_back(SplitDwarfOut);
}
+ // Host-side cuda compilation receives device-side outputs as Inputs[1...].
+ // Include them with -fcuda-include-gpubinary.
+ if (IsCuda && Inputs.size() > 1)
+ for (InputInfoList::const_iterator it = std::next(Inputs.begin()),
+ ie = Inputs.end();
+ it != ie; ++it) {
+ CmdArgs.push_back("-fcuda-include-gpubinary");
+ CmdArgs.push_back(it->getFilename());
+ }
+
// Finally add the compile command to the compilation.
if (Args.hasArg(options::OPT__SLASH_fallback) &&
Output.getType() == types::TY_Object &&