[CUDA] Fix false-positive in known-emitted handling. Previously: When compiling for host, our constructed call graph went *through* kernel calls. This meant that if we had host calls kernel calls HD we would incorrectly mark the HD function as known-emitted on the host side, and thus perform host-side checks on it. Fixing this exposed another issue, wherein when marking a function as known-emitted, we also need to traverse the callgraph of its template, because non-dependent calls are attached to a function's template, not its instantiation. llvm-svn: 284355

commit: d692dfb65e92095d7bdab32543cb967f08fe7745 [log] [tgz]
author: Justin Lebar <jlebar@google.com> Mon Oct 17 02:25:55 2016 +0000
committer: Justin Lebar <jlebar@google.com> Mon Oct 17 02:25:55 2016 +0000
tree: a37acc3f90913969d750ef096c8773ed3603d5b9
parent: 715ad7fef5d42a2ab001a2528027a3aa741f5b7c [diff] [blame]
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 18751d4..75ec5f2 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp

@@ -644,10 +644,16 @@
     S.CUDAKnownEmittedFns.insert(Caller);
     EmitDeferredDiags(S, Caller);
 
-    // Deferred diags are often emitted on the template itself, so emit those as
-    // well.
-    if (auto *Templ = Caller->getPrimaryTemplate())
-      EmitDeferredDiags(S, Templ->getAsFunction());
+    // If this is a template instantiation, explore its callgraph as well:
+    // Non-dependent calls are part of the template's callgraph, while dependent
+    // calls are part of to the instantiation's call graph.
+    if (auto *Templ = Caller->getPrimaryTemplate()) {
+      FunctionDecl *TemplFD = Templ->getAsFunction();
+      if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
+        Seen.insert(TemplFD);
+        Worklist.push_back(TemplFD);
+      }
+    }
 
     // Add all functions called by Caller to our worklist.
     auto CGIt = S.CUDACallGraph.find(Caller);
@@ -676,11 +682,21 @@
   if (!Caller)
     return true;
 
+  // If the caller is known-emitted, mark the callee as known-emitted.
+  // Otherwise, mark the call in our call graph so we can traverse it later.
   bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
   if (CallerKnownEmitted)
     MarkKnownEmitted(*this, Callee);
-  else
-    CUDACallGraph[Caller].insert(Callee);
+  else {
+    // If we have
+    //   host fn calls kernel fn calls host+device,
+    // the HD function does not get instantiated on the host.  We model this by
+    // omitting at the call to the kernel from the callgraph.  This ensures
+    // that, when compiling for host, only HD functions actually called from the
+    // host get marked as known-emitted.
+    if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
+      CUDACallGraph[Caller].insert(Callee);
+  }
 
   CUDADiagBuilder::Kind DiagKind = [&] {
     switch (IdentifyCUDAPreference(Caller, Callee)) {
commit	d692dfb65e92095d7bdab32543cb967f08fe7745	[log] [tgz]
author	Justin Lebar <jlebar@google.com>	Mon Oct 17 02:25:55 2016 +0000
committer	Justin Lebar <jlebar@google.com>	Mon Oct 17 02:25:55 2016 +0000
tree	a37acc3f90913969d750ef096c8773ed3603d5b9
parent	715ad7fef5d42a2ab001a2528027a3aa741f5b7c [diff] [blame]