[cuda] Driver changes to compile and stitch together host and device-side CUDA code. NOTE: reverts r242077 to reinstate r242058, r242065, 242067 and includes fix for OS X test failures. - Changed driver pipeline to compile host and device side of CUDA files and incorporate results of device-side compilation into host object file. - Added a test for cuda pipeline creation in clang driver. New clang options: --cuda-host-only - Do host-side compilation only. --cuda-device-only - Do device-side compilation only. --cuda-gpu-arch=<ARCH> - specify GPU architecture for device-side compilation. E.g. sm_35, sm_30. Default is sm_20. May be used more than once in which case one device-compilation will be done per unique specified GPU architecture. Differential Revision: http://reviews.llvm.org/D9509 llvm-svn: 242085

commit: 0ff05cd1651474678e8f503cae4956f0c342bf67 [log] [tgz]
author: Artem Belevich <tra@google.com> Mon Jul 13 23:27:56 2015 +0000
committer: Artem Belevich <tra@google.com> Mon Jul 13 23:27:56 2015 +0000
tree: 98c608f18a4fd854516da066943e603c71280988
parent: 2eacca86ef2c255bae8eff43056dfa6c57ae7092 [diff] [blame]
diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp
index bf9b4ba..c6dc178 100644
--- a/clang/lib/Driver/Tools.cpp
+++ b/clang/lib/Driver/Tools.cpp

@@ -1488,6 +1488,12 @@
     return CPUName;
   }
 
+  case llvm::Triple::nvptx:
+  case llvm::Triple::nvptx64:
+    if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
+      return A->getValue();
+    return "";
+
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le: {
@@ -2826,8 +2832,14 @@
       getToolChain().getTriple().isWindowsCygwinEnvironment();
   bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment();
 
-  assert(Inputs.size() == 1 && "Unable to handle multiple inputs.");
+  // Check number of inputs for sanity. We need at least one input.
+  assert(Inputs.size() >= 1 && "Must have at least one input.");
   const InputInfo &Input = Inputs[0];
+  // CUDA compilation may have multiple inputs (source file + results of
+  // device-side compilations). All other jobs are expected to have exactly one
+  // input.
+  bool IsCuda = types::isCuda(Input.getType());
+  assert((IsCuda || Inputs.size() == 1) && "Unable to handle multiple inputs.");
 
   // Invoke ourselves in -cc1 mode.
   //
@@ -4812,14 +4824,12 @@
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  for (const auto &II : Inputs) {
-    addDashXForInput(Args, II, CmdArgs);
+  addDashXForInput(Args, Input, CmdArgs);
 
-    if (II.isFilename())
-      CmdArgs.push_back(II.getFilename());
-    else
-      II.getInputArg().renderAsInput(Args, CmdArgs);
-  }
+  if (Input.isFilename())
+    CmdArgs.push_back(Input.getFilename());
+  else
+    Input.getInputArg().renderAsInput(Args, CmdArgs);
 
   Args.AddAllArgs(CmdArgs, options::OPT_undef);
 
@@ -4857,6 +4867,16 @@
     CmdArgs.push_back(SplitDwarfOut);
   }
 
+  // Host-side cuda compilation receives device-side outputs as Inputs[1...].
+  // Include them with -fcuda-include-gpubinary.
+  if (IsCuda && Inputs.size() > 1)
+    for (InputInfoList::const_iterator it = std::next(Inputs.begin()),
+                                       ie = Inputs.end();
+         it != ie; ++it) {
+      CmdArgs.push_back("-fcuda-include-gpubinary");
+      CmdArgs.push_back(it->getFilename());
+    }
+
   // Finally add the compile command to the compilation.
   if (Args.hasArg(options::OPT__SLASH_fallback) &&
       Output.getType() == types::TY_Object &&
commit	0ff05cd1651474678e8f503cae4956f0c342bf67	[log] [tgz]
author	Artem Belevich <tra@google.com>	Mon Jul 13 23:27:56 2015 +0000
committer	Artem Belevich <tra@google.com>	Mon Jul 13 23:27:56 2015 +0000
tree	98c608f18a4fd854516da066943e603c71280988
parent	2eacca86ef2c255bae8eff43056dfa6c57ae7092 [diff] [blame]