[CUDA] Conservatively mark inline asm as convergent. Summary: This is particularly important because a some convergent CUDA intrinsics (e.g. __shfl_down) are implemented in terms of inline asm. Reviewers: tra Subscribers: cfe-commits Differential Revision: http://reviews.llvm.org/D20836 llvm-svn: 271336

commit: f179364341ef49440757a481b377627d17db3aaa [log] [tgz]
author: Justin Lebar <jlebar@google.com> Tue May 31 21:27:13 2016 +0000
committer: Justin Lebar <jlebar@google.com> Tue May 31 21:27:13 2016 +0000
tree: 91f6ce66a817e125e164c2fffc16b171150cb443
parent: 0aeb313e792649aa063e91783e3d8675cfdfd5be [diff] [blame]
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6945ec9..ff70bbc 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp

@@ -2054,6 +2054,14 @@
                                           llvm::ConstantAsMetadata::get(Loc)));
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    // Conservatively, mark all inline asm blocks in CUDA as convergent
+    // (meaning, they may call an intrinsically convergent op, such as bar.sync,
+    // and so can't have certain optimizations applied around them).
+    Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+                         llvm::Attribute::Convergent);
+  }
+
   // Extract all of the register value results from the asm.
   std::vector<llvm::Value*> RegResults;
   if (ResultRegTypes.size() == 1) {
commit	f179364341ef49440757a481b377627d17db3aaa	[log] [tgz]
author	Justin Lebar <jlebar@google.com>	Tue May 31 21:27:13 2016 +0000
committer	Justin Lebar <jlebar@google.com>	Tue May 31 21:27:13 2016 +0000
tree	91f6ce66a817e125e164c2fffc16b171150cb443
parent	0aeb313e792649aa063e91783e3d8675cfdfd5be [diff] [blame]