[OPENMP][NVPTX]Mark more functions as always_inline for better
performance.
Internally generated functions must be marked as always_inlines in most
cases. Patch marks some extra reduction function + outlined parallel
functions as always_inline for better performance, but only if the
optimization is requested.
llvm-svn: 361269
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c3f60d7..eb21bbd 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1274,9 +1274,11 @@
auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
Name, &CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
- Fn->removeFnAttr(llvm::Attribute::NoInline);
- Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
- Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+ if (CGM.getLangOpts().Optimize) {
+ Fn->removeFnAttr(llvm::Attribute::NoInline);
+ Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
+ Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
CodeGenFunction CGF(CGM);
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
// Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
@@ -4671,9 +4673,11 @@
&CGM.getModule());
CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
TaskPrivatesMapFnInfo);
- TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
- TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
- TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
+ if (CGM.getLangOpts().Optimize) {
+ TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
+ TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
+ TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
TaskPrivatesMapFnInfo, Args, Loc, Loc);