[OPENMP][NVPTX]Use faster teams reduction algorithm.
A faster way to reduce the values in teams reductions was found, the
codegen is updated to use this faster algorithm and new runtime functions.
llvm-svn: 354479
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index d4094ac..1a33a00 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2840,6 +2840,9 @@
Opts.OpenMPCUDABlocksPerSM =
getLastArgIntValue(Args, options::OPT_fopenmp_cuda_blocks_per_sm_EQ,
Opts.OpenMPCUDABlocksPerSM, Diags);
+ Opts.OpenMPCUDAReductionBufNum = getLastArgIntValue(
+ Args, options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ,
+ Opts.OpenMPCUDAReductionBufNum, Diags);
}
// Prevent auto-widening the representation of loop counters during an