[OPENMP50]Codegen for scan directives in parallel for regions.
Summary:
Added codegen for scan directives in parallel for regions.
Emits the code for the directive with inscan reductions.
Original code:
```
#pragma omp parallel for reduction(inscan, op : ...)
for() {
<input phase>;
#pragma omp scan (in)exclusive(...)
<scan phase>
}
```
is transformed to something:
```
#pragma omp parallel
{
size num_iters = <num_iters>;
<type> buffer[num_iters];
#pragma omp for
for (i: 0..<num_iters>) {
<input phase>;
buffer[i] = red;
}
#pragma omp barrier
for (int k = 0; k != ceil(log2(num_iters)); ++k)
for (size cnt = last_iter; cnt >= pow(2, k); --k)
buffer[i] op= buffer[i-pow(2,k)];
#pragma omp for
for (0..<num_iters>) {
red = InclusiveScan ? buffer[i] : buffer[i-1];
<scan phase>;
}
}
```
Reviewers: jdoerfert
Subscribers: yaxunl, guansong, sstefan1, cfe-commits, caomhin
Tags: #clang
Differential Revision: https://reviews.llvm.org/D81478
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index e17bc18..a91ca3d 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3203,41 +3203,53 @@
SecondGen(CGF);
}
+static bool emitWorksharingDirective(CodeGenFunction &CGF,
+ const OMPLoopDirective &S,
+ bool HasCancel) {
+ bool HasLastprivates;
+ if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+ [](const OMPReductionClause *C) {
+ return C->getModifier() == OMPC_REDUCTION_inscan;
+ })) {
+ const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+ OMPLoopScope LoopScope(CGF, S);
+ return CGF.EmitScalarExpr(S.getNumIterations());
+ };
+ const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPCancelStackRAII CancelRegion(
+ CGF, S.getDirectiveKind(), HasCancel);
+ (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
+ // Emit an implicit barrier at the end.
+ CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
+ OMPD_for);
+ };
+ const auto &&SecondGen = [&S, HasCancel,
+ &HasLastprivates](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPCancelStackRAII CancelRegion(
+ CGF, S.getDirectiveKind(), HasCancel);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
+ };
+ emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
+ } else {
+ CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
+ HasCancel);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
+ }
+ return HasLastprivates;
+}
+
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
- if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
- [](const OMPReductionClause *C) {
- return C->getModifier() == OMPC_REDUCTION_inscan;
- })) {
- const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
- OMPLocalDeclMapRAII Scope(CGF);
- OMPLoopScope LoopScope(CGF, S);
- return CGF.EmitScalarExpr(S.getNumIterations());
- };
- const auto &&FirstGen = [&S](CodeGenFunction &CGF) {
- OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
- (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
- // Emit an implicit barrier at the end.
- CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
- OMPD_for);
- };
- const auto &&SecondGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
- OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
- };
- emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
- } else {
- OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
- }
+ HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
};
{
auto LPCRegion =
@@ -3258,34 +3270,7 @@
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
- if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
- [](const OMPReductionClause *C) {
- return C->getModifier() == OMPC_REDUCTION_inscan;
- })) {
- const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
- OMPLocalDeclMapRAII Scope(CGF);
- OMPLoopScope LoopScope(CGF, S);
- return CGF.EmitScalarExpr(S.getNumIterations());
- };
- const auto &&FirstGen = [&S](CodeGenFunction &CGF) {
- (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
- // Emit an implicit barrier at the end.
- CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
- OMPD_for);
- };
- const auto &&SecondGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
- };
- emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
- } else {
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
- }
+ HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
};
{
auto LPCRegion =
@@ -3621,9 +3606,7 @@
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
- OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
- CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
- emitDispatchForLoopBounds);
+ (void)emitWorksharingDirective(CGF, S, S.hasCancel());
};
{
auto LPCRegion =