[OPENMP50]Codegen for inscan reductions in worksharing directives.

Summary:
Implemented codegen for reduction clauses with inscan modifiers in
worksharing constructs.

Emits the code for the directive with inscan reductions.
The code is the following:
```
size num_iters = <num_iters>;
<type> buffer[num_iters];
for (i: 0..<num_iters>) {
  <input phase>;
  buffer[i] = red;
}
for (int k = 0; k != ceil(log2(num_iters)); ++k)
for (size cnt = last_iter; cnt >= pow(2, k); --k)
  buffer[i] op= buffer[i-pow(2,k)];
for (0..<num_iters>) {
  red = InclusiveScan ? buffer[i] : buffer[i-1];
  <scan phase>;
}
```

Reviewers: jdoerfert

Subscribers: yaxunl, guansong, arphaman, cfe-commits, caomhin

Tags: #clang

Differential Revision: https://reviews.llvm.org/D79948
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index cccb15a..cf498c6 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -253,7 +253,7 @@
     EmitOMPDepobjDirective(cast<OMPDepobjDirective>(*S));
     break;
   case Stmt::OMPScanDirectiveClass:
-    llvm_unreachable("Scan directive not supported yet.");
+    EmitOMPScanDirective(cast<OMPScanDirective>(*S));
     break;
   case Stmt::OMPOrderedDirectiveClass:
     EmitOMPOrderedDirective(cast<OMPOrderedDirective>(*S));
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index ae4e340..2395230 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1161,7 +1161,7 @@
 
 void CodeGenFunction::EmitOMPReductionClauseInit(
     const OMPExecutableDirective &D,
-    CodeGenFunction::OMPPrivateScope &PrivateScope) {
+    CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
   if (!HaveInsertPoint())
     return;
   SmallVector<const Expr *, 4> Shareds;
@@ -1173,6 +1173,8 @@
   SmallVector<const Expr *, 4> TaskLHSs;
   SmallVector<const Expr *, 4> TaskRHSs;
   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
+    if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
+      continue;
     Shareds.append(C->varlist_begin(), C->varlist_end());
     Privates.append(C->privates().begin(), C->privates().end());
     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
@@ -1387,6 +1389,9 @@
   bool HasAtLeastOneReduction = false;
   bool IsReductionWithTaskMod = false;
   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
+    // Do not emit for inscan reductions.
+    if (C->getModifier() == OMPC_REDUCTION_inscan)
+      continue;
     HasAtLeastOneReduction = true;
     Privates.append(C->privates().begin(), C->privates().end());
     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
@@ -1705,6 +1710,24 @@
                          getProfileCount(D.getBody()));
     EmitBlock(NextBB);
   }
+
+  OMPPrivateScope InscanScope(*this);
+  EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
+  bool IsInscanRegion = InscanScope.Privatize();
+  if (IsInscanRegion) {
+    // Need to remember the block before and after scan directive
+    // to dispatch them correctly depending on the clause used in
+    // this directive, inclusive or exclusive. For inclusive scan the natural
+    // order of the blocks is used, for exclusive clause the blocks must be
+    // executed in reverse order.
+    OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
+    OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
+    OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
+    OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
+    EmitBranch(OMPScanDispatch);
+    EmitBlock(OMPBeforeScanBlock);
+  }
+
   // Emit loop variables for C++ range loops.
   const Stmt *Body =
       D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
@@ -1714,6 +1737,10 @@
                Body, /*TryImperfectlyNestedLoops=*/true),
            D.getCollapsedNumber());
 
+  // Jump to the dispatcher at the end of the loop body.
+  if (IsInscanRegion)
+    EmitBranch(OMPScanExitBlock);
+
   // The end (updates/cleanups).
   EmitBlock(Continue.getBlock());
   BreakContinueStack.pop_back();
@@ -2979,14 +3006,217 @@
   return {LBVal, UBVal};
 }
 
+/// Emits the code for the directive with inscan reductions.
+/// The code is the following:
+/// \code
+/// size num_iters = <num_iters>;
+/// <type> buffer[num_iters];
+/// #pragma omp ...
+/// for (i: 0..<num_iters>) {
+///   <input phase>;
+///   buffer[i] = red;
+/// }
+/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
+/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
+///   buffer[i] op= buffer[i-pow(2,k)];
+/// #pragma omp ...
+/// for (0..<num_iters>) {
+///   red = InclusiveScan ? buffer[i] : buffer[i-1];
+///   <scan phase>;
+/// }
+/// \endcode
+static void emitScanBasedDirective(
+    CodeGenFunction &CGF, const OMPLoopDirective &S,
+    llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
+    llvm::function_ref<void(CodeGenFunction &)> FirstGen,
+    llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
+  llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
+      NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
+  SmallVector<const Expr *, 4> Shareds;
+  SmallVector<const Expr *, 4> Privates;
+  SmallVector<const Expr *, 4> ReductionOps;
+  SmallVector<const Expr *, 4> LHSs;
+  SmallVector<const Expr *, 4> RHSs;
+  SmallVector<const Expr *, 4> CopyOps;
+  SmallVector<const Expr *, 4> CopyArrayTemps;
+  SmallVector<const Expr *, 4> CopyArrayElems;
+  for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+    assert(C->getModifier() == OMPC_REDUCTION_inscan &&
+           "Only inscan reductions are expected.");
+    Shareds.append(C->varlist_begin(), C->varlist_end());
+    Privates.append(C->privates().begin(), C->privates().end());
+    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+    CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
+    CopyArrayTemps.append(C->copy_array_temps().begin(),
+                          C->copy_array_temps().end());
+    CopyArrayElems.append(C->copy_array_elems().begin(),
+                          C->copy_array_elems().end());
+  }
+  {
+    // Emit buffers for each reduction variables.
+    // ReductionCodeGen is required to emit correctly the code for array
+    // reductions.
+    ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
+    unsigned Count = 0;
+    auto *ITA = CopyArrayTemps.begin();
+    for (const Expr *IRef : Privates) {
+      const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
+      // Emit variably modified arrays, used for arrays/array sections
+      // reductions.
+      if (PrivateVD->getType()->isVariablyModifiedType()) {
+        RedCG.emitSharedOrigLValue(CGF, Count);
+        RedCG.emitAggregateType(CGF, Count);
+      }
+      CodeGenFunction::OpaqueValueMapping DimMapping(
+          CGF,
+          cast<OpaqueValueExpr>(
+              cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
+                  ->getSizeExpr()),
+          RValue::get(OMPScanNumIterations));
+      // Emit temp buffer.
+      CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
+      ++ITA;
+      ++Count;
+    }
+  }
+  CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
+  {
+    // Emit loop with input phase:
+    // #pragma omp ...
+    // for (i: 0..<num_iters>) {
+    //   <input phase>;
+    //   buffer[i] = red;
+    // }
+    CGF.OMPFirstScanLoop = true;
+    CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+    FirstGen(CGF);
+  }
+  // Emit prefix reduction:
+  // for (int k = 0; k <= ceil(log2(n)); ++k)
+  llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
+  llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
+  llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
+  llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
+  llvm::Value *Arg =
+      CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
+  llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
+  F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
+  LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
+  LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
+  llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
+      OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
+  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
+  CGF.EmitBlock(LoopBB);
+  auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
+  // size pow2k = 1;
+  auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
+  Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
+  Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
+  // for (size i = n - 1; i >= 2 ^ k; --i)
+  //   tmp[i] op= tmp[i-pow2k];
+  llvm::BasicBlock *InnerLoopBB =
+      CGF.createBasicBlock("omp.inner.log.scan.body");
+  llvm::BasicBlock *InnerExitBB =
+      CGF.createBasicBlock("omp.inner.log.scan.exit");
+  llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
+  CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+  CGF.EmitBlock(InnerLoopBB);
+  auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
+  IVal->addIncoming(NMin1, LoopBB);
+  {
+    CodeGenFunction::OMPPrivateScope PrivScope(CGF);
+    auto *ILHS = LHSs.begin();
+    auto *IRHS = RHSs.begin();
+    for (const Expr *CopyArrayElem : CopyArrayElems) {
+      const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+      const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+      Address LHSAddr = Address::invalid();
+      {
+        CodeGenFunction::OpaqueValueMapping IdxMapping(
+            CGF,
+            cast<OpaqueValueExpr>(
+                cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+            RValue::get(IVal));
+        LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+      }
+      PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
+      Address RHSAddr = Address::invalid();
+      {
+        llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
+        CodeGenFunction::OpaqueValueMapping IdxMapping(
+            CGF,
+            cast<OpaqueValueExpr>(
+                cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+            RValue::get(OffsetIVal));
+        RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+      }
+      PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
+      ++ILHS;
+      ++IRHS;
+    }
+    PrivScope.Privatize();
+    CGF.CGM.getOpenMPRuntime().emitReduction(
+        CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
+        {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
+  }
+  llvm::Value *NextIVal =
+      CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
+  IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
+  CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
+  CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+  CGF.EmitBlock(InnerExitBB);
+  llvm::Value *Next =
+      CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
+  Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
+  // pow2k <<= 1;
+  llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+  Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
+  llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
+  CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+  auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
+  CGF.EmitBlock(ExitBB);
+
+  CGF.OMPFirstScanLoop = false;
+  SecondGen(CGF);
+}
+
 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
   bool HasLastprivates = false;
   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
                                           PrePostActionTy &) {
-    OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
-    HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
-                                                 emitForLoopBounds,
-                                                 emitDispatchForLoopBounds);
+    if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+                     [](const OMPReductionClause *C) {
+                       return C->getModifier() == OMPC_REDUCTION_inscan;
+                     })) {
+      const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+        OMPLocalDeclMapRAII Scope(CGF);
+        OMPLoopScope LoopScope(CGF, S);
+        return CGF.EmitScalarExpr(S.getNumIterations());
+      };
+      const auto &&FirstGen = [&S](CodeGenFunction &CGF) {
+        OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
+        (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+                                         emitForLoopBounds,
+                                         emitDispatchForLoopBounds);
+        // Emit an implicit barrier at the end.
+        CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
+                                                   OMPD_for);
+      };
+      const auto &&SecondGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
+        OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
+        HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+                                                     emitForLoopBounds,
+                                                     emitDispatchForLoopBounds);
+      };
+      emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
+    } else {
+      OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
+      HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+                                                   emitForLoopBounds,
+                                                   emitDispatchForLoopBounds);
+    }
   };
   {
     auto LPCRegion =
@@ -3961,6 +4191,112 @@
   }
 }
 
+void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
+  // Do not emit code for non-simd directives in simd-only mode.
+  if (getLangOpts().OpenMPSimd && !OMPParentLoopDirectiveForScan)
+    return;
+  const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
+  SmallVector<const Expr *, 4> Shareds;
+  SmallVector<const Expr *, 4> Privates;
+  SmallVector<const Expr *, 4> LHSs;
+  SmallVector<const Expr *, 4> RHSs;
+  SmallVector<const Expr *, 4> CopyOps;
+  SmallVector<const Expr *, 4> CopyArrayTemps;
+  SmallVector<const Expr *, 4> CopyArrayElems;
+  for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
+    if (C->getModifier() != OMPC_REDUCTION_inscan)
+      continue;
+    Shareds.append(C->varlist_begin(), C->varlist_end());
+    Privates.append(C->privates().begin(), C->privates().end());
+    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+    CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
+    CopyArrayTemps.append(C->copy_array_temps().begin(),
+                          C->copy_array_temps().end());
+    CopyArrayElems.append(C->copy_array_elems().begin(),
+                          C->copy_array_elems().end());
+  }
+  bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
+  if (!IsInclusive) {
+    EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
+    EmitBlock(OMPScanExitBlock);
+  }
+  if (OMPFirstScanLoop) {
+    // Emit buffer[i] = red; at the end of the input phase.
+    const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
+                             .getIterationVariable()
+                             ->IgnoreParenImpCasts();
+    LValue IdxLVal = EmitLValue(IVExpr);
+    llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
+    IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
+    for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+      const Expr *PrivateExpr = Privates[I];
+      const Expr *OrigExpr = Shareds[I];
+      const Expr *CopyArrayElem = CopyArrayElems[I];
+      OpaqueValueMapping IdxMapping(
+          *this,
+          cast<OpaqueValueExpr>(
+              cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+          RValue::get(IdxVal));
+      LValue DestLVal = EmitLValue(CopyArrayElem);
+      LValue SrcLVal = EmitLValue(OrigExpr);
+      EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+                  SrcLVal.getAddress(*this),
+                  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+                  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+                  CopyOps[I]);
+    }
+  }
+  EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
+  if (IsInclusive) {
+    EmitBlock(OMPScanExitBlock);
+    EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
+  }
+  EmitBlock(OMPScanDispatch);
+  if (!OMPFirstScanLoop) {
+    // Emit red = buffer[i]; at the entrance to the scan phase.
+    const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
+                             .getIterationVariable()
+                             ->IgnoreParenImpCasts();
+    LValue IdxLVal = EmitLValue(IVExpr);
+    llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
+    IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
+    llvm::BasicBlock *ExclusiveExitBB = nullptr;
+    if (!IsInclusive) {
+      llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
+      ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
+      llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
+      Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
+      EmitBlock(ContBB);
+      // Use idx - 1 iteration for exclusive scan.
+      IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
+    }
+    for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+      const Expr *PrivateExpr = Privates[I];
+      const Expr *OrigExpr = Shareds[I];
+      const Expr *CopyArrayElem = CopyArrayElems[I];
+      OpaqueValueMapping IdxMapping(
+          *this,
+          cast<OpaqueValueExpr>(
+              cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+          RValue::get(IdxVal));
+      LValue SrcLVal = EmitLValue(CopyArrayElem);
+      LValue DestLVal = EmitLValue(OrigExpr);
+      EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+                  SrcLVal.getAddress(*this),
+                  cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+                  cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+                  CopyOps[I]);
+    }
+    if (!IsInclusive) {
+      EmitBlock(ExclusiveExitBB);
+    }
+  }
+  EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
+                                               : OMPAfterScanBlock);
+  EmitBlock(OMPAfterScanBlock);
+}
+
 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
                                             const CodeGenLoopTy &CodeGenLoop,
                                             Expr *IncExpr) {
@@ -5950,6 +6286,10 @@
 
 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
     const OMPExecutableDirective &D) {
+  if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
+    EmitOMPScanDirective(*SD);
+    return;
+  }
   if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
     return;
   auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index e3dd462..2b0ebad 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -673,6 +673,32 @@
 
   llvm::BasicBlock *getInvokeDestImpl();
 
+  /// Parent loop-based directive for scan directive.
+  const OMPExecutableDirective *OMPParentLoopDirectiveForScan = nullptr;
+  llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
+  llvm::BasicBlock *OMPAfterScanBlock = nullptr;
+  llvm::BasicBlock *OMPScanExitBlock = nullptr;
+  llvm::BasicBlock *OMPScanDispatch = nullptr;
+  bool OMPFirstScanLoop = false;
+
+  /// Manages parent directive for scan directives.
+  class ParentLoopDirectiveForScanRegion {
+    CodeGenFunction &CGF;
+    const OMPExecutableDirective &ParentLoopDirectiveForScan;
+
+  public:
+    ParentLoopDirectiveForScanRegion(
+        CodeGenFunction &CGF,
+        const OMPExecutableDirective &ParentLoopDirectiveForScan)
+        : CGF(CGF),
+          ParentLoopDirectiveForScan(*CGF.OMPParentLoopDirectiveForScan) {
+      CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan;
+    }
+    ~ParentLoopDirectiveForScanRegion() {
+      CGF.OMPParentLoopDirectiveForScan = &ParentLoopDirectiveForScan;
+    }
+  };
+
   template <class T>
   typename DominatingValue<T>::saved_type saveValueInCond(T value) {
     return DominatingValue<T>::save(*this, value);
@@ -3201,7 +3227,8 @@
   /// proper codegen in internal captured statement.
   ///
   void EmitOMPReductionClauseInit(const OMPExecutableDirective &D,
-                                  OMPPrivateScope &PrivateScope);
+                                  OMPPrivateScope &PrivateScope,
+                                  bool ForInscan = false);
   /// Emit final update of reduction values to original variables at
   /// the end of the directive.
   ///
@@ -3260,6 +3287,7 @@
   void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S);
   void EmitOMPFlushDirective(const OMPFlushDirective &S);
   void EmitOMPDepobjDirective(const OMPDepobjDirective &S);
+  void EmitOMPScanDirective(const OMPScanDirective &S);
   void EmitOMPOrderedDirective(const OMPOrderedDirective &S);
   void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
   void EmitOMPTargetDirective(const OMPTargetDirective &S);