[OPENMP][NVPTX]Mark more functions as always_inline for better
performance.
Internally generated functions must be marked as always_inlines in most
cases. Patch marks some extra reduction function + outlined parallel
functions as always_inline for better performance, but only if the
optimization is requested.
llvm-svn: 361269
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index ca1e931..e6f9d97 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -1929,6 +1929,11 @@
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
+ if (CGM.getLangOpts().Optimize) {
+ OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+ OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
+ OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
IsInTTDRegion = PrevIsInTTDRegion;
if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
@@ -2045,9 +2050,11 @@
CodeGen.setAction(Action);
llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
- OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
- OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
- OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+ if (CGM.getLangOpts().Optimize) {
+ OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+ OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
+ OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
return OutlinedFun;
}
@@ -3422,6 +3429,12 @@
"_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setDoesNotRecurse();
+ if (CGM.getLangOpts().Optimize) {
+ Fn->removeFnAttr(llvm::Attribute::NoInline);
+ Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
+ Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
+
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);