[OPENMP50]Codegen for reduction clauses with 'task' modifier.
Summary:
Added codegen for reduction clause with task modifier.
```
#pragma omp ... reduction(task, +: a)
{
#pragma omp ... in_reduction(+: a)
}
```
is translated into something like this:
```
#pragma omp ... reduction(+:a)
{
struct red_input_t {
void *reduce_shar;
void *reduce_orig;
size_t reduce_size;
void *reduce_init;
void *reduce_fini;
void *reduce_comb;
unsigned flags;
} r_var;
r_var.reduce_shar = &a;
r_var.reduce_orig = &original a;
r_var.reduce_size = sizeof(a);
r_var.reduce_init = [](void* l,void*){return *(int*)l=0;};
r_var.reduce_fini = nullptr;
r_var.reduce_comb = [](void* l,void* r){return *(int*)l += *(int)r;};
void *tg = __kmpc_taskred_modifier_init(<loc_addr>,<gtid>,
<flag - 0 for parallel, 1 for worksharing>,
<1 - number of reduction elements>,
&r_var);
{
#pragma omp ... in_reduction(+: a) firstprivate(tg)
...
}
__kmpc_task_reduction_modifier_fini(<loc_addr>,<gtid>,
<flag - 0 for parallel, 1 for worksharing>);
}
```
Reviewers: jdoerfert
Subscribers: yaxunl, guansong, jfb, cfe-commits, caomhin
Tags: #clang
Differential Revision: https://reviews.llvm.org/D79034
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 19bbcb8..02075be 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1169,21 +1169,23 @@
SmallVector<const Expr *, 4> ReductionOps;
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
+ OMPTaskDataTy Data;
+ SmallVector<const Expr *, 4> TaskLHSs;
+ SmallVector<const Expr *, 4> TaskRHSs;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Shareds.emplace_back(Ref);
- Privates.emplace_back(*IPriv);
- ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
+ Shareds.append(C->varlist_begin(), C->varlist_end());
+ Privates.append(C->privates().begin(), C->privates().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ if (C->getModifier() == OMPC_REDUCTION_task) {
+ Data.ReductionVars.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
}
ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
@@ -1261,6 +1263,117 @@
++IPriv;
++Count;
}
+ if (!Data.ReductionVars.empty()) {
+ Data.IsReductionWithTaskMod = true;
+ Data.IsWorksharingReduction =
+ isOpenMPWorksharingDirective(D.getDirectiveKind());
+ llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
+ *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
+ const Expr *TaskRedRef = nullptr;
+ switch (D.getDirectiveKind()) {
+ case OMPD_parallel:
+ TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_for:
+ TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_sections:
+ TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_for:
+ TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_master:
+ TaskRedRef =
+ cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_sections:
+ TaskRedRef =
+ cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_target_parallel:
+ TaskRedRef =
+ cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_target_parallel_for:
+ TaskRedRef =
+ cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_distribute_parallel_for:
+ TaskRedRef =
+ cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_teams_distribute_parallel_for:
+ TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
+ .getTaskReductionRefExpr();
+ break;
+ case OMPD_target_teams_distribute_parallel_for:
+ TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
+ .getTaskReductionRefExpr();
+ break;
+ case OMPD_simd:
+ case OMPD_for_simd:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_parallel_for_simd:
+ case OMPD_task:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
+ case OMPD_ordered:
+ case OMPD_atomic:
+ case OMPD_teams:
+ case OMPD_target:
+ case OMPD_cancellation_point:
+ case OMPD_cancel:
+ case OMPD_target_data:
+ case OMPD_target_enter_data:
+ case OMPD_target_exit_data:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
+ case OMPD_distribute:
+ case OMPD_target_update:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_distribute_simd:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_threadprivate:
+ case OMPD_allocate:
+ case OMPD_declare_reduction:
+ case OMPD_declare_mapper:
+ case OMPD_declare_simd:
+ case OMPD_requires:
+ case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
+ case OMPD_unknown:
+ llvm_unreachable("Enexpected directive with task reductions.");
+ }
+
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
+ EmitVarDecl(*VD);
+ EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
+ /*Volatile=*/false, TaskRedRef->getType());
+ }
}
void CodeGenFunction::EmitOMPReductionClauseFinal(
@@ -1272,14 +1385,22 @@
llvm::SmallVector<const Expr *, 8> RHSExprs;
llvm::SmallVector<const Expr *, 8> ReductionOps;
bool HasAtLeastOneReduction = false;
+ bool IsReductionWithTaskMod = false;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
HasAtLeastOneReduction = true;
Privates.append(C->privates().begin(), C->privates().end());
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ IsReductionWithTaskMod =
+ IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
}
if (HasAtLeastOneReduction) {
+ if (IsReductionWithTaskMod) {
+ CGM.getOpenMPRuntime().emitTaskReductionFini(
+ *this, D.getBeginLoc(),
+ isOpenMPWorksharingDirective(D.getDirectiveKind()));
+ }
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
isOpenMPParallelDirective(D.getDirectiveKind()) ||
ReductionKind == OMPD_simd;
@@ -3382,21 +3503,13 @@
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Data.ReductionVars.emplace_back(Ref);
- Data.ReductionCopies.emplace_back(*IPriv);
- Data.ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
- }
+ Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
*this, S.getBeginLoc(), LHSs, RHSs, Data);
@@ -3776,21 +3889,13 @@
SmallVector<const Expr *, 4> RHSs;
OMPTaskDataTy Data;
for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Data.ReductionVars.emplace_back(Ref);
- Data.ReductionCopies.emplace_back(*IPriv);
- Data.ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
- }
+ Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
llvm::Value *ReductionDesc =
CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),