[OPENMP] Codegen for 'lastprivate' clause in 'sections' directive.
#pragma omp sections lastprivate(<var>)
<BODY>;
This construct is translated into something like:
<last_iter> = alloca i32
<init for lastprivates>;
<last_iter> = 0
; No initializer for simple variables or a default constructor is called for objects.
; For arrays perform element by element initialization by the call of the default constructor.
...
OMP_FOR_START(...,<last_iter>, ..); sets <last_iter> to 1 if this is the last iteration.
<BODY>
...
OMP_FOR_END
if (<last_iter> != 0) {
<final copy for lastprivate>; Update original variable with the lastprivate value.
}
call __kmpc_cancel_barrier() ; an implicit barrier to avoid possible data race.
If there is only one section, there is no special code generation, original shared variables are used + barrier is emitted at the end of the directive.
Differential Revision: http://reviews.llvm.org/D9240
llvm-svn: 235834
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 0b54b1b..62fe4bc 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1112,7 +1112,8 @@
auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
auto *CS = dyn_cast<CompoundStmt>(Stmt);
if (CS && CS->size() > 1) {
- auto &&CodeGen = [&S, CS](CodeGenFunction &CGF) {
+ bool HasLastprivates = false;
+ auto &&CodeGen = [&S, CS, &HasLastprivates](CodeGenFunction &CGF) {
auto &C = CGF.CGM.getContext();
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Emit helper vars inits.
@@ -1173,6 +1174,7 @@
OMPD_unknown);
}
CGF.EmitOMPPrivateClause(S, LoopScope);
+ HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
(void)LoopScope.Privatize();
// Emit static non-chunked loop.
@@ -1192,14 +1194,38 @@
[](CodeGenFunction &) {});
// Tell the runtime we are done.
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocStart());
+
+ // Emit final copy of the lastprivate variables if IsLastIter != 0.
+ if (HasLastprivates)
+ CGF.EmitOMPLastprivateClauseFinal(
+ S, CGF.Builder.CreateIsNotNull(
+ CGF.EmitLoadOfScalar(IL, S.getLocStart())));
};
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, CodeGen);
+ // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
+ // clause. Otherwise the barrier will be generated by the codegen for the
+ // directive.
+ if (HasLastprivates && S.getSingleClause(OMPC_nowait)) {
+ // Emit implicit barrier to synchronize threads and avoid data races on
+ // initialization of firstprivate variables.
+ CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
+ OMPD_unknown);
+ }
return OMPD_sections;
}
// If only one section is found - no need to generate loop, emit as a single
// region.
bool HasFirstprivates;
+ // No need to generate lastprivates for sections with single section region,
+ // we can use original shared variable for all calculations with barrier at
+ // the end of the sections.
+ auto LastprivateFilter = [](const OMPClause *C) -> bool {
+ return C->getClauseKind() == OMPC_lastprivate;
+ };
+ OMPExecutableDirective::filtered_clause_iterator<decltype(LastprivateFilter)>
+ I(S.clauses(), LastprivateFilter);
+ bool HasLastprivates = I;
auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) {
CodeGenFunction::OMPPrivateScope SingleScope(CGF);
HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
@@ -1212,10 +1238,10 @@
CGF.CGM.getOpenMPRuntime().emitSingleRegion(CGF, CodeGen, S.getLocStart(),
llvm::None, llvm::None,
llvm::None, llvm::None);
- // Emit barrier for firstprivates only if 'sections' directive has 'nowait'
- // clause. Otherwise the barrier will be generated by the codegen for the
- // directive.
- if (HasFirstprivates && S.getSingleClause(OMPC_nowait)) {
+ // Emit barrier for firstprivates or lastprivates only if 'sections' directive
+ // has 'nowait' clause. Otherwise the barrier will be generated by the codegen
+ // for the directive.
+ if ((HasFirstprivates || HasLastprivates) && S.getSingleClause(OMPC_nowait)) {
// Emit implicit barrier to synchronize threads and avoid data races on
// initialization of firstprivate variables.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),