| //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This contains code to emit OpenMP nodes as LLVM code. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "CGOpenMPRuntime.h" |
| #include "CodeGenFunction.h" |
| #include "CodeGenModule.h" |
| #include "TargetInfo.h" |
| #include "clang/AST/Stmt.h" |
| #include "clang/AST/StmtOpenMP.h" |
| using namespace clang; |
| using namespace CodeGen; |
| |
| //===----------------------------------------------------------------------===// |
| // OpenMP Directive Emission |
| //===----------------------------------------------------------------------===// |
| namespace { |
| /// \brief RAII for inlined OpenMP regions (like 'omp for', 'omp simd', 'omp |
| /// critical' etc.). Helps to generate proper debug info and provides correct |
| /// code generation for such constructs. |
| class InlinedOpenMPRegionScopeRAII { |
| InlinedOpenMPRegionRAII Region; |
| CodeGenFunction::LexicalScope DirectiveScope; |
| |
| public: |
| InlinedOpenMPRegionScopeRAII(CodeGenFunction &CGF, |
| const OMPExecutableDirective &D) |
| : Region(CGF, D), DirectiveScope(CGF, D.getSourceRange()) {} |
| }; |
| } // namespace |
| |
| /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen |
| /// function. Here is the logic: |
| /// if (Cond) { |
| /// CodeGen(true); |
| /// } else { |
| /// CodeGen(false); |
| /// } |
| static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, |
| const std::function<void(bool)> &CodeGen) { |
| CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); |
| |
| // If the condition constant folds and can be elided, try to avoid emitting |
| // the condition and the dead arm of the if/else. |
| bool CondConstant; |
| if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { |
| CodeGen(CondConstant); |
| return; |
| } |
| |
| // Otherwise, the condition did not fold, or we couldn't elide it. Just |
| // emit the conditional branch. |
| auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then"); |
| auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else"); |
| auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end"); |
| CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0); |
| |
| // Emit the 'then' code. |
| CGF.EmitBlock(ThenBlock); |
| CodeGen(/*ThenBlock*/ true); |
| CGF.EmitBranch(ContBlock); |
| // Emit the 'else' code if present. |
| { |
| // There is no need to emit line number for unconditional branch. |
| auto NL = ApplyDebugLocation::CreateEmpty(CGF); |
| CGF.EmitBlock(ElseBlock); |
| } |
| CodeGen(/*ThenBlock*/ false); |
| { |
| // There is no need to emit line number for unconditional branch. |
| auto NL = ApplyDebugLocation::CreateEmpty(CGF); |
| CGF.EmitBranch(ContBlock); |
| } |
| // Emit the continuation block for code after the if. |
| CGF.EmitBlock(ContBlock, /*IsFinished*/ true); |
| } |
| |
| void CodeGenFunction::EmitOMPAggregateAssign(LValue OriginalAddr, |
| llvm::Value *PrivateAddr, |
| const Expr *AssignExpr, |
| QualType OriginalType, |
| const VarDecl *VDInit) { |
| EmitBlock(createBasicBlock(".omp.assign.begin.")); |
| if (!isa<CXXConstructExpr>(AssignExpr) || isTrivialInitializer(AssignExpr)) { |
| // Perform simple memcpy. |
| EmitAggregateAssign(PrivateAddr, OriginalAddr.getAddress(), |
| AssignExpr->getType()); |
| } else { |
| // Perform element-by-element initialization. |
| QualType ElementTy; |
| auto SrcBegin = OriginalAddr.getAddress(); |
| auto DestBegin = PrivateAddr; |
| auto ArrayTy = OriginalType->getAsArrayTypeUnsafe(); |
| auto SrcNumElements = emitArrayLength(ArrayTy, ElementTy, SrcBegin); |
| auto DestNumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin); |
| auto SrcEnd = Builder.CreateGEP(SrcBegin, SrcNumElements); |
| auto DestEnd = Builder.CreateGEP(DestBegin, DestNumElements); |
| // The basic structure here is a do-while loop, because we don't |
| // need to check for the zero-element case. |
| auto BodyBB = createBasicBlock("omp.arraycpy.body"); |
| auto DoneBB = createBasicBlock("omp.arraycpy.done"); |
| auto IsEmpty = |
| Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); |
| Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
| |
| // Enter the loop body, making that address the current address. |
| auto EntryBB = Builder.GetInsertBlock(); |
| EmitBlock(BodyBB); |
| auto SrcElementPast = Builder.CreatePHI(SrcBegin->getType(), 2, |
| "omp.arraycpy.srcElementPast"); |
| SrcElementPast->addIncoming(SrcEnd, EntryBB); |
| auto DestElementPast = Builder.CreatePHI(DestBegin->getType(), 2, |
| "omp.arraycpy.destElementPast"); |
| DestElementPast->addIncoming(DestEnd, EntryBB); |
| |
| // Shift the address back by one element. |
| auto NegativeOne = llvm::ConstantInt::get(SizeTy, -1, true); |
| auto DestElement = Builder.CreateGEP(DestElementPast, NegativeOne, |
| "omp.arraycpy.dest.element"); |
| auto SrcElement = Builder.CreateGEP(SrcElementPast, NegativeOne, |
| "omp.arraycpy.src.element"); |
| { |
| // Create RunCleanScope to cleanup possible temps. |
| CodeGenFunction::RunCleanupsScope Init(*this); |
| // Emit initialization for single element. |
| LocalDeclMap[VDInit] = SrcElement; |
| EmitAnyExprToMem(AssignExpr, DestElement, |
| AssignExpr->getType().getQualifiers(), |
| /*IsInitializer*/ false); |
| LocalDeclMap.erase(VDInit); |
| } |
| |
| // Check whether we've reached the end. |
| auto Done = |
| Builder.CreateICmpEQ(DestElement, DestBegin, "omp.arraycpy.done"); |
| Builder.CreateCondBr(Done, DoneBB, BodyBB); |
| DestElementPast->addIncoming(DestElement, Builder.GetInsertBlock()); |
| SrcElementPast->addIncoming(SrcElement, Builder.GetInsertBlock()); |
| |
| // Done. |
| EmitBlock(DoneBB, true); |
| } |
| EmitBlock(createBasicBlock(".omp.assign.end.")); |
| } |
| |
| void CodeGenFunction::EmitOMPFirstprivateClause( |
| const OMPExecutableDirective &D, |
| CodeGenFunction::OMPPrivateScope &PrivateScope) { |
| auto PrivateFilter = [](const OMPClause *C) -> bool { |
| return C->getClauseKind() == OMPC_firstprivate; |
| }; |
| for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> |
| I(D.clauses(), PrivateFilter); I; ++I) { |
| auto *C = cast<OMPFirstprivateClause>(*I); |
| auto IRef = C->varlist_begin(); |
| auto InitsRef = C->inits().begin(); |
| for (auto IInit : C->private_copies()) { |
| auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
| auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
| bool IsRegistered; |
| if (*InitsRef != nullptr) { |
| // Emit VarDecl with copy init for arrays. |
| auto *FD = CapturedStmtInfo->lookup(OrigVD); |
| LValue Base = MakeNaturalAlignAddrLValue( |
| CapturedStmtInfo->getContextValue(), |
| getContext().getTagDeclType(FD->getParent())); |
| auto OriginalAddr = EmitLValueForField(Base, FD); |
| auto VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl()); |
| IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { |
| auto Emission = EmitAutoVarAlloca(*VD); |
| // Emit initialization of aggregate firstprivate vars. |
| EmitOMPAggregateAssign(OriginalAddr, Emission.getAllocatedAddress(), |
| VD->getInit(), (*IRef)->getType(), VDInit); |
| EmitAutoVarCleanups(Emission); |
| return Emission.getAllocatedAddress(); |
| }); |
| } else |
| IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { |
| // Emit private VarDecl with copy init. |
| EmitDecl(*VD); |
| return GetAddrOfLocalVar(VD); |
| }); |
| assert(IsRegistered && "firstprivate var already registered as private"); |
| // Silence the warning about unused variable. |
| (void)IsRegistered; |
| ++IRef, ++InitsRef; |
| } |
| } |
| } |
| |
| void CodeGenFunction::EmitOMPPrivateClause( |
| const OMPExecutableDirective &D, |
| CodeGenFunction::OMPPrivateScope &PrivateScope) { |
| auto PrivateFilter = [](const OMPClause *C) -> bool { |
| return C->getClauseKind() == OMPC_private; |
| }; |
| for (OMPExecutableDirective::filtered_clause_iterator<decltype(PrivateFilter)> |
| I(D.clauses(), PrivateFilter); I; ++I) { |
| auto *C = cast<OMPPrivateClause>(*I); |
| auto IRef = C->varlist_begin(); |
| for (auto IInit : C->private_copies()) { |
| auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
| auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); |
| bool IsRegistered = |
| PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value * { |
| // Emit private VarDecl with copy init. |
| EmitDecl(*VD); |
| return GetAddrOfLocalVar(VD); |
| }); |
| assert(IsRegistered && "private var already registered as private"); |
| // Silence the warning about unused variable. |
| (void)IsRegistered; |
| ++IRef; |
| } |
| } |
| } |
| |
| /// \brief Emits code for OpenMP parallel directive in the parallel region. |
| static void EmitOMPParallelCall(CodeGenFunction &CGF, |
| const OMPParallelDirective &S, |
| llvm::Value *OutlinedFn, |
| llvm::Value *CapturedStruct) { |
| if (auto C = S.getSingleClause(/*K*/ OMPC_num_threads)) { |
| CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); |
| auto NumThreadsClause = cast<OMPNumThreadsClause>(C); |
| auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), |
| /*IgnoreResultAssign*/ true); |
| CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( |
| CGF, NumThreads, NumThreadsClause->getLocStart()); |
| } |
| CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, |
| CapturedStruct); |
| } |
| |
| void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { |
| auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); |
| auto CapturedStruct = GenerateCapturedStmtArgument(*CS); |
| auto OutlinedFn = CGM.getOpenMPRuntime().emitOutlinedFunction( |
| S, *CS->getCapturedDecl()->param_begin()); |
| if (auto C = S.getSingleClause(/*K*/ OMPC_if)) { |
| auto Cond = cast<OMPIfClause>(C)->getCondition(); |
| EmitOMPIfClause(*this, Cond, [&](bool ThenBlock) { |
| if (ThenBlock) |
| EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); |
| else |
| CGM.getOpenMPRuntime().emitSerialCall(*this, S.getLocStart(), |
| OutlinedFn, CapturedStruct); |
| }); |
| } else |
| EmitOMPParallelCall(*this, S, OutlinedFn, CapturedStruct); |
| } |
| |
| void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, |
| bool SeparateIter) { |
| RunCleanupsScope BodyScope(*this); |
| // Update counters values on current iteration. |
| for (auto I : S.updates()) { |
| EmitIgnoredExpr(I); |
| } |
| // Update the linear variables. |
| for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { |
| for (auto U : C->updates()) { |
| EmitIgnoredExpr(U); |
| } |
| } |
| |
| // On a continue in the body, jump to the end. |
| auto Continue = getJumpDestInCurrentScope("omp.body.continue"); |
| BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue)); |
| // Emit loop body. |
| EmitStmt(S.getBody()); |
| // The end (updates/cleanups). |
| EmitBlock(Continue.getBlock()); |
| BreakContinueStack.pop_back(); |
| if (SeparateIter) { |
| // TODO: Update lastprivates if the SeparateIter flag is true. |
| // This will be implemented in a follow-up OMPLastprivateClause patch, but |
| // result should be still correct without it, as we do not make these |
| // variables private yet. |
| } |
| } |
| |
| void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, |
| const Expr *LoopCond, |
| const Expr *IncExpr, |
| const std::function<void()> &BodyGen) { |
| auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); |
| auto Cnt = getPGORegionCounter(&S); |
| |
| // Start the loop with a block that tests the condition. |
| auto CondBlock = createBasicBlock("omp.inner.for.cond"); |
| EmitBlock(CondBlock); |
| LoopStack.push(CondBlock); |
| |
| // If there are any cleanups between here and the loop-exit scope, |
| // create a block to stage a loop exit along. |
| auto ExitBlock = LoopExit.getBlock(); |
| if (RequiresCleanup) |
| ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); |
| |
| auto LoopBody = createBasicBlock("omp.inner.for.body"); |
| |
| // Emit condition. |
| EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount()); |
| if (ExitBlock != LoopExit.getBlock()) { |
| EmitBlock(ExitBlock); |
| EmitBranchThroughCleanup(LoopExit); |
| } |
| |
| EmitBlock(LoopBody); |
| Cnt.beginRegion(Builder); |
| |
| // Create a block for the increment. |
| auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); |
| BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
| |
| BodyGen(); |
| |
| // Emit "IV = IV + 1" and a back-edge to the condition block. |
| EmitBlock(Continue.getBlock()); |
| EmitIgnoredExpr(IncExpr); |
| BreakContinueStack.pop_back(); |
| EmitBranch(CondBlock); |
| LoopStack.pop(); |
| // Emit the fall-through block. |
| EmitBlock(LoopExit.getBlock()); |
| } |
| |
| void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) { |
| auto IC = S.counters().begin(); |
| for (auto F : S.finals()) { |
| if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) { |
| EmitIgnoredExpr(F); |
| } |
| ++IC; |
| } |
| // Emit the final values of the linear variables. |
| for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { |
| for (auto F : C->finals()) { |
| EmitIgnoredExpr(F); |
| } |
| } |
| } |
| |
| static void EmitOMPAlignedClause(CodeGenFunction &CGF, CodeGenModule &CGM, |
| const OMPAlignedClause &Clause) { |
| unsigned ClauseAlignment = 0; |
| if (auto AlignmentExpr = Clause.getAlignment()) { |
| auto AlignmentCI = |
| cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); |
| ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue()); |
| } |
| for (auto E : Clause.varlists()) { |
| unsigned Alignment = ClauseAlignment; |
| if (Alignment == 0) { |
| // OpenMP [2.8.1, Description] |
| // If no optional parameter is specified, implementation-defined default |
| // alignments for SIMD instructions on the target platforms are assumed. |
| Alignment = CGM.getTargetCodeGenInfo().getOpenMPSimdDefaultAlignment( |
| E->getType()); |
| } |
| assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) && |
| "alignment is not power of 2"); |
| if (Alignment != 0) { |
| llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
| CGF.EmitAlignmentAssumption(PtrValue, Alignment); |
| } |
| } |
| } |
| |
| static void EmitPrivateLoopCounters(CodeGenFunction &CGF, |
| CodeGenFunction::OMPPrivateScope &LoopScope, |
| ArrayRef<Expr *> Counters) { |
| for (auto *E : Counters) { |
| auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
| bool IsRegistered = LoopScope.addPrivate(VD, [&]() -> llvm::Value * { |
| // Emit var without initialization. |
| auto VarEmission = CGF.EmitAutoVarAlloca(*VD); |
| CGF.EmitAutoVarCleanups(VarEmission); |
| return VarEmission.getAllocatedAddress(); |
| }); |
| assert(IsRegistered && "counter already registered as private"); |
| // Silence the warning about unused variable. |
| (void)IsRegistered; |
| } |
| } |
| |
| static void |
| EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, |
| CodeGenFunction::OMPPrivateScope &PrivateScope) { |
| for (auto Clause : OMPExecutableDirective::linear_filter(D.clauses())) { |
| for (auto *E : Clause->varlists()) { |
| auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
| bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * { |
| // Emit var without initialization. |
| auto VarEmission = CGF.EmitAutoVarAlloca(*VD); |
| CGF.EmitAutoVarCleanups(VarEmission); |
| return VarEmission.getAllocatedAddress(); |
| }); |
| assert(IsRegistered && "linear var already registered as private"); |
| // Silence the warning about unused variable. |
| (void)IsRegistered; |
| } |
| } |
| } |
| |
| void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { |
| // Pragma 'simd' code depends on presence of 'lastprivate'. |
| // If present, we have to separate last iteration of the loop: |
| // |
| // if (LastIteration != 0) { |
| // for (IV in 0..LastIteration-1) BODY; |
| // BODY with updates of lastprivate vars; |
| // <Final counter/linear vars updates>; |
| // } |
| // |
| // otherwise (when there's no lastprivate): |
| // |
| // for (IV in 0..LastIteration) BODY; |
| // <Final counter/linear vars updates>; |
| // |
| |
| // Walk clauses and process safelen/lastprivate. |
| bool SeparateIter = false; |
| LoopStack.setParallel(); |
| LoopStack.setVectorizerEnable(true); |
| for (auto C : S.clauses()) { |
| switch (C->getClauseKind()) { |
| case OMPC_safelen: { |
| RValue Len = EmitAnyExpr(cast<OMPSafelenClause>(C)->getSafelen(), |
| AggValueSlot::ignored(), true); |
| llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
| LoopStack.setVectorizerWidth(Val->getZExtValue()); |
| // In presence of finite 'safelen', it may be unsafe to mark all |
| // the memory instructions parallel, because loop-carried |
| // dependences of 'safelen' iterations are possible. |
| LoopStack.setParallel(false); |
| break; |
| } |
| case OMPC_aligned: |
| EmitOMPAlignedClause(*this, CGM, cast<OMPAlignedClause>(*C)); |
| break; |
| case OMPC_lastprivate: |
| SeparateIter = true; |
| break; |
| default: |
| // Not handled yet |
| ; |
| } |
| } |
| |
| InlinedOpenMPRegionScopeRAII Region(*this, S); |
| |
| // Emit inits for the linear variables. |
| for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { |
| for (auto Init : C->inits()) { |
| auto *D = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); |
| EmitVarDecl(*D); |
| } |
| } |
| |
| // Emit the loop iteration variable. |
| const Expr *IVExpr = S.getIterationVariable(); |
| const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); |
| EmitVarDecl(*IVDecl); |
| EmitIgnoredExpr(S.getInit()); |
| |
| // Emit the iterations count variable. |
| // If it is not a variable, Sema decided to calculate iterations count on each |
| // iteration (e.g., it is foldable into a constant). |
| if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
| EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
| // Emit calculation of the iterations count. |
| EmitIgnoredExpr(S.getCalcLastIteration()); |
| } |
| |
| // Emit the linear steps for the linear clauses. |
| // If a step is not constant, it is pre-calculated before the loop. |
| for (auto C : OMPExecutableDirective::linear_filter(S.clauses())) { |
| if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep())) |
| if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) { |
| EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); |
| // Emit calculation of the linear step. |
| EmitIgnoredExpr(CS); |
| } |
| } |
| |
| if (SeparateIter) { |
| // Emit: if (LastIteration > 0) - begin. |
| RegionCounter Cnt = getPGORegionCounter(&S); |
| auto ThenBlock = createBasicBlock("simd.if.then"); |
| auto ContBlock = createBasicBlock("simd.if.end"); |
| EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); |
| EmitBlock(ThenBlock); |
| Cnt.beginRegion(Builder); |
| // Emit 'then' code. |
| { |
| OMPPrivateScope LoopScope(*this); |
| EmitPrivateLoopCounters(*this, LoopScope, S.counters()); |
| EmitPrivateLinearVars(*this, S, LoopScope); |
| EmitOMPPrivateClause(S, LoopScope); |
| (void)LoopScope.Privatize(); |
| EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), |
| S.getCond(/*SeparateIter=*/true), S.getInc(), |
| [&S, this]() { |
| EmitOMPLoopBody(S); |
| EmitStopPoint(&S); |
| }); |
| EmitOMPLoopBody(S, /* SeparateIter */ true); |
| } |
| EmitOMPSimdFinal(S); |
| // Emit: if (LastIteration != 0) - end. |
| EmitBranch(ContBlock); |
| EmitBlock(ContBlock, true); |
| } else { |
| { |
| OMPPrivateScope LoopScope(*this); |
| EmitPrivateLoopCounters(*this, LoopScope, S.counters()); |
| EmitPrivateLinearVars(*this, S, LoopScope); |
| EmitOMPPrivateClause(S, LoopScope); |
| (void)LoopScope.Privatize(); |
| EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), |
| S.getCond(/*SeparateIter=*/false), S.getInc(), |
| [&S, this]() { |
| EmitOMPLoopBody(S); |
| EmitStopPoint(&S); |
| }); |
| } |
| EmitOMPSimdFinal(S); |
| } |
| } |
| |
| void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, |
| const OMPLoopDirective &S, |
| OMPPrivateScope &LoopScope, |
| llvm::Value *LB, llvm::Value *UB, |
| llvm::Value *ST, llvm::Value *IL, |
| llvm::Value *Chunk) { |
| auto &RT = CGM.getOpenMPRuntime(); |
| |
| // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). |
| const bool Dynamic = RT.isDynamic(ScheduleKind); |
| |
| assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && |
| "static non-chunked schedule does not need outer loop"); |
| |
| // Emit outer loop. |
| // |
| // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
| // When schedule(dynamic,chunk_size) is specified, the iterations are |
| // distributed to threads in the team in chunks as the threads request them. |
| // Each thread executes a chunk of iterations, then requests another chunk, |
| // until no chunks remain to be distributed. Each chunk contains chunk_size |
| // iterations, except for the last chunk to be distributed, which may have |
| // fewer iterations. When no chunk_size is specified, it defaults to 1. |
| // |
| // When schedule(guided,chunk_size) is specified, the iterations are assigned |
| // to threads in the team in chunks as the executing threads request them. |
| // Each thread executes a chunk of iterations, then requests another chunk, |
| // until no chunks remain to be assigned. For a chunk_size of 1, the size of |
| // each chunk is proportional to the number of unassigned iterations divided |
| // by the number of threads in the team, decreasing to 1. For a chunk_size |
| // with value k (greater than 1), the size of each chunk is determined in the |
| // same way, with the restriction that the chunks do not contain fewer than k |
| // iterations (except for the last chunk to be assigned, which may have fewer |
| // than k iterations). |
| // |
| // When schedule(auto) is specified, the decision regarding scheduling is |
| // delegated to the compiler and/or runtime system. The programmer gives the |
| // implementation the freedom to choose any possible mapping of iterations to |
| // threads in the team. |
| // |
| // When schedule(runtime) is specified, the decision regarding scheduling is |
| // deferred until run time, and the schedule and chunk size are taken from the |
| // run-sched-var ICV. If the ICV is set to auto, the schedule is |
| // implementation defined |
| // |
| // while(__kmpc_dispatch_next(&LB, &UB)) { |
| // idx = LB; |
| // while (idx <= UB) { BODY; ++idx; } // inner loop |
| // } |
| // |
| // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
| // When schedule(static, chunk_size) is specified, iterations are divided into |
| // chunks of size chunk_size, and the chunks are assigned to the threads in |
| // the team in a round-robin fashion in the order of the thread number. |
| // |
| // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { |
| // while (idx <= UB) { BODY; ++idx; } // inner loop |
| // LB = LB + ST; |
| // UB = UB + ST; |
| // } |
| // |
| |
| const Expr *IVExpr = S.getIterationVariable(); |
| const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
| const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
| |
| RT.emitForInit( |
| *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, |
| (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST, |
| Chunk); |
| |
| auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); |
| |
| // Start the loop with a block that tests the condition. |
| auto CondBlock = createBasicBlock("omp.dispatch.cond"); |
| EmitBlock(CondBlock); |
| LoopStack.push(CondBlock); |
| |
| llvm::Value *BoolCondVal = nullptr; |
| if (!Dynamic) { |
| // UB = min(UB, GlobalUB) |
| EmitIgnoredExpr(S.getEnsureUpperBound()); |
| // IV = LB |
| EmitIgnoredExpr(S.getInit()); |
| // IV < UB |
| BoolCondVal = EvaluateExprAsBool(S.getCond(false)); |
| } else { |
| BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, |
| IL, LB, UB, ST); |
| } |
| |
| // If there are any cleanups between here and the loop-exit scope, |
| // create a block to stage a loop exit along. |
| auto ExitBlock = LoopExit.getBlock(); |
| if (LoopScope.requiresCleanups()) |
| ExitBlock = createBasicBlock("omp.dispatch.cleanup"); |
| |
| auto LoopBody = createBasicBlock("omp.dispatch.body"); |
| Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); |
| if (ExitBlock != LoopExit.getBlock()) { |
| EmitBlock(ExitBlock); |
| EmitBranchThroughCleanup(LoopExit); |
| } |
| EmitBlock(LoopBody); |
| |
| // Emit "IV = LB" (in case of static schedule, we have already calculated new |
| // LB for loop condition and emitted it above). |
| if (Dynamic) |
| EmitIgnoredExpr(S.getInit()); |
| |
| // Create a block for the increment. |
| auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); |
| BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); |
| |
| EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), |
| S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() { |
| EmitOMPLoopBody(S); |
| EmitStopPoint(&S); |
| }); |
| |
| EmitBlock(Continue.getBlock()); |
| BreakContinueStack.pop_back(); |
| if (!Dynamic) { |
| // Emit "LB = LB + Stride", "UB = UB + Stride". |
| EmitIgnoredExpr(S.getNextLowerBound()); |
| EmitIgnoredExpr(S.getNextUpperBound()); |
| } |
| |
| EmitBranch(CondBlock); |
| LoopStack.pop(); |
| // Emit the fall-through block. |
| EmitBlock(LoopExit.getBlock()); |
| |
| // Tell the runtime we are done. |
| // FIXME: Also call fini for ordered loops with dynamic scheduling. |
| if (!Dynamic) |
| RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); |
| } |
| |
| /// \brief Emit a helper variable and return corresponding lvalue. |
| static LValue EmitOMPHelperVar(CodeGenFunction &CGF, |
| const DeclRefExpr *Helper) { |
| auto VDecl = cast<VarDecl>(Helper->getDecl()); |
| CGF.EmitVarDecl(*VDecl); |
| return CGF.EmitLValue(Helper); |
| } |
| |
| void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { |
| // Emit the loop iteration variable. |
| auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); |
| auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); |
| EmitVarDecl(*IVDecl); |
| |
| // Emit the iterations count variable. |
| // If it is not a variable, Sema decided to calculate iterations count on each |
| // iteration (e.g., it is foldable into a constant). |
| if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { |
| EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); |
| // Emit calculation of the iterations count. |
| EmitIgnoredExpr(S.getCalcLastIteration()); |
| } |
| |
| auto &RT = CGM.getOpenMPRuntime(); |
| |
| // Check pre-condition. |
| { |
| // Skip the entire loop if we don't meet the precondition. |
| RegionCounter Cnt = getPGORegionCounter(&S); |
| auto ThenBlock = createBasicBlock("omp.precond.then"); |
| auto ContBlock = createBasicBlock("omp.precond.end"); |
| EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); |
| EmitBlock(ThenBlock); |
| Cnt.beginRegion(Builder); |
| // Emit 'then' code. |
| { |
| // Emit helper vars inits. |
| LValue LB = |
| EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); |
| LValue UB = |
| EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); |
| LValue ST = |
| EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); |
| LValue IL = |
| EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); |
| |
| OMPPrivateScope LoopScope(*this); |
| EmitPrivateLoopCounters(*this, LoopScope, S.counters()); |
| (void)LoopScope.Privatize(); |
| |
| // Detect the loop schedule kind and chunk. |
| auto ScheduleKind = OMPC_SCHEDULE_unknown; |
| llvm::Value *Chunk = nullptr; |
| if (auto C = cast_or_null<OMPScheduleClause>( |
| S.getSingleClause(OMPC_schedule))) { |
| ScheduleKind = C->getScheduleKind(); |
| if (auto Ch = C->getChunkSize()) { |
| Chunk = EmitScalarExpr(Ch); |
| Chunk = EmitScalarConversion(Chunk, Ch->getType(), |
| S.getIterationVariable()->getType()); |
| } |
| } |
| const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
| const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
| if (RT.isStaticNonchunked(ScheduleKind, |
| /* Chunked */ Chunk != nullptr)) { |
| // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
| // When no chunk_size is specified, the iteration space is divided into |
| // chunks that are approximately equal in size, and at most one chunk is |
| // distributed to each thread. Note that the size of the chunks is |
| // unspecified in this case. |
| RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, |
| IL.getAddress(), LB.getAddress(), UB.getAddress(), |
| ST.getAddress()); |
| // UB = min(UB, GlobalUB); |
| EmitIgnoredExpr(S.getEnsureUpperBound()); |
| // IV = LB; |
| EmitIgnoredExpr(S.getInit()); |
| // while (idx <= UB) { BODY; ++idx; } |
| EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), |
| S.getCond(/*SeparateIter=*/false), S.getInc(), |
| [&S, this]() { |
| EmitOMPLoopBody(S); |
| EmitStopPoint(&S); |
| }); |
| // Tell the runtime we are done. |
| RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); |
| } else { |
| // Emit the outer loop, which requests its work chunk [LB..UB] from |
| // runtime and runs the inner loop to process it. |
| EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(), |
| UB.getAddress(), ST.getAddress(), IL.getAddress(), |
| Chunk); |
| } |
| } |
| // We're now done with the loop, so jump to the continuation block. |
| EmitBranch(ContBlock); |
| EmitBlock(ContBlock, true); |
| } |
| } |
| |
| void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { |
| InlinedOpenMPRegionScopeRAII Region(*this, S); |
| |
| EmitOMPWorksharingLoop(S); |
| |
| // Emit an implicit barrier at the end. |
| CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), |
| /*IsExplicit*/ false); |
| } |
| |
| void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { |
| llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); |
| } |
| |
| static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, |
| const Twine &Name, |
| llvm::Value *Init = nullptr) { |
| auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); |
| if (Init) |
| CGF.EmitScalarInit(Init, LVal); |
| return LVal; |
| } |
| |
| void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { |
| InlinedOpenMPRegionScopeRAII Region(*this, S); |
| |
| auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); |
| auto *CS = dyn_cast<CompoundStmt>(Stmt); |
| if (CS && CS->size() > 1) { |
| auto &C = CGM.getContext(); |
| auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
| // Emit helper vars inits. |
| LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.", |
| Builder.getInt32(0)); |
| auto *GlobalUBVal = Builder.getInt32(CS->size() - 1); |
| LValue UB = |
| createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); |
| LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.", |
| Builder.getInt32(1)); |
| LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.", |
| Builder.getInt32(0)); |
| // Loop counter. |
| LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv."); |
| OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); |
| OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV); |
| OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); |
| OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB); |
| // Generate condition for loop. |
| BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, |
| OK_Ordinary, S.getLocStart(), /*fpContractable=*/false); |
| // Increment for loop counter. |
| UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, |
| S.getLocStart()); |
| auto BodyGen = [this, CS, &S, &IV]() { |
| // Iterate through all sections and emit a switch construct: |
| // switch (IV) { |
| // case 0: |
| // <SectionStmt[0]>; |
| // break; |
| // ... |
| // case <NumSection> - 1: |
| // <SectionStmt[<NumSection> - 1]>; |
| // break; |
| // } |
| // .omp.sections.exit: |
| auto *ExitBB = createBasicBlock(".omp.sections.exit"); |
| auto *SwitchStmt = Builder.CreateSwitch( |
| EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, |
| CS->size()); |
| unsigned CaseNumber = 0; |
| for (auto C = CS->children(); C; ++C, ++CaseNumber) { |
| auto CaseBB = createBasicBlock(".omp.sections.case"); |
| EmitBlock(CaseBB); |
| SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); |
| EmitStmt(*C); |
| EmitBranch(ExitBB); |
| } |
| EmitBlock(ExitBB, /*IsFinished=*/true); |
| }; |
| // Emit static non-chunked loop. |
| CGM.getOpenMPRuntime().emitForInit( |
| *this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, |
| /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), |
| ST.getAddress()); |
| // UB = min(UB, GlobalUB); |
| auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart()); |
| auto *MinUBGlobalUB = Builder.CreateSelect( |
| Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); |
| EmitStoreOfScalar(MinUBGlobalUB, UB); |
| // IV = LB; |
| EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV); |
| // while (idx <= UB) { BODY; ++idx; } |
| EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen); |
| // Tell the runtime we are done. |
| CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(), |
| OMPC_SCHEDULE_static); |
| } else { |
| // If only one section is found - no need to generate loop, emit as a single |
| // region. |
| CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { |
| InlinedOpenMPRegionScopeRAII Region(*this, S); |
| EmitStmt(Stmt); |
| EnsureInsertPoint(); |
| }, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None); |
| } |
| |
| // Emit an implicit barrier at the end. |
| if (!S.getSingleClause(OMPC_nowait)) |
| CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), |
| /*IsExplicit=*/false); |
| } |
| |
| void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { |
| InlinedOpenMPRegionScopeRAII Region(*this, S); |
| EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); |
| EnsureInsertPoint(); |
| } |
| |
| void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { |
| llvm::SmallVector<const Expr *, 8> CopyprivateVars; |
| llvm::SmallVector<const Expr *, 8> SrcExprs; |
| llvm::SmallVector<const Expr *, 8> DstExprs; |
| llvm::SmallVector<const Expr *, 8> AssignmentOps; |
| // Check if there are any 'copyprivate' clauses associated with this 'single' |
| // construct. |
| auto CopyprivateFilter = [](const OMPClause *C) -> bool { |
| return C->getClauseKind() == OMPC_copyprivate; |
| }; |
| // Build a list of copyprivate variables along with helper expressions |
| // (<source>, <destination>, <destination>=<source> expressions) |
| typedef OMPExecutableDirective::filtered_clause_iterator<decltype( |
| CopyprivateFilter)> CopyprivateIter; |
| for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) { |
| auto *C = cast<OMPCopyprivateClause>(*I); |
| CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); |
| SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); |
| DstExprs.append(C->destination_exprs().begin(), |
| C->destination_exprs().end()); |
| AssignmentOps.append(C->assignment_ops().begin(), |
| C->assignment_ops().end()); |
| } |
| // Emit code for 'single' region along with 'copyprivate' clauses |
| CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { |
| InlinedOpenMPRegionScopeRAII Region(*this, S); |
| EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); |
| EnsureInsertPoint(); |
| }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); |
| // Emit an implicit barrier at the end. |
| if (!S.getSingleClause(OMPC_nowait)) |
| CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), |
| /*IsExplicit=*/false); |
| } |
| |
| void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { |
| CGM.getOpenMPRuntime().emitMasterRegion(*this, [&]() -> void { |
| InlinedOpenMPRegionScopeRAII Region(*this, S); |
| EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); |
| EnsureInsertPoint(); |
| }, S.getLocStart()); |
| } |
| |
| void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { |
| CGM.getOpenMPRuntime().emitCriticalRegion( |
| *this, S.getDirectiveName().getAsString(), [&]() -> void { |
| InlinedOpenMPRegionScopeRAII Region(*this, S); |
| EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); |
| EnsureInsertPoint(); |
| }, S.getLocStart()); |
| } |
| |
| void |
| CodeGenFunction::EmitOMPParallelForDirective(const OMPParallelForDirective &) { |
| llvm_unreachable("CodeGen for 'omp parallel for' is not supported yet."); |
| } |
| |
| void CodeGenFunction::EmitOMPParallelForSimdDirective( |
| const OMPParallelForSimdDirective &) { |
| llvm_unreachable("CodeGen for 'omp parallel for simd' is not supported yet."); |
| } |
| |
| void CodeGenFunction::EmitOMPParallelSectionsDirective( |
| const OMPParallelSectionsDirective &) { |
| llvm_unreachable("CodeGen for 'omp parallel sections' is not supported yet."); |
| } |
| |
| void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { |
| // Emit outlined function for task construct. |
| auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); |
| auto CapturedStruct = GenerateCapturedStmtArgument(*CS); |
| auto *I = CS->getCapturedDecl()->param_begin(); |
| // The first function argument for tasks is a thread id, the second one is a |
| // part id (0 for tied tasks, >=0 for untied task). |
| auto OutlinedFn = |
| CGM.getOpenMPRuntime().emitTaskOutlinedFunction(S, *I, *std::next(I)); |
| // Check if we should emit tied or untied task. |
| bool Tied = !S.getSingleClause(OMPC_untied); |
| // Check if the task is final |
| llvm::PointerIntPair<llvm::Value *, 1, bool> Final; |
| if (auto *Clause = S.getSingleClause(OMPC_final)) { |
| // If the condition constant folds and can be elided, try to avoid emitting |
| // the condition and the dead arm of the if/else. |
| auto *Cond = cast<OMPFinalClause>(Clause)->getCondition(); |
| bool CondConstant; |
| if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) |
| Final.setInt(CondConstant); |
| else |
| Final.setPointer(EvaluateExprAsBool(Cond)); |
| } else { |
| // By default the task is not final. |
| Final.setInt(/*IntVal=*/false); |
| } |
| auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); |
| CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), Tied, Final, |
| OutlinedFn, SharedsTy, CapturedStruct); |
| } |
| |
| void CodeGenFunction::EmitOMPTaskyieldDirective( |
| const OMPTaskyieldDirective &S) { |
| CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart()); |
| } |
| |
| void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { |
| CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart()); |
| } |
| |
| void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) { |
| llvm_unreachable("CodeGen for 'omp taskwait' is not supported yet."); |
| } |
| |
| void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { |
| CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> { |
| if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) { |
| auto FlushClause = cast<OMPFlushClause>(C); |
| return llvm::makeArrayRef(FlushClause->varlist_begin(), |
| FlushClause->varlist_end()); |
| } |
| return llvm::None; |
| }(), S.getLocStart()); |
| } |
| |
| void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &) { |
| llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); |
| } |
| |
| static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, |
| QualType SrcType, QualType DestType) { |
| assert(CGF.hasScalarEvaluationKind(DestType) && |
| "DestType must have scalar evaluation kind."); |
| assert(!Val.isAggregate() && "Must be a scalar or complex."); |
| return Val.isScalar() |
| ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) |
| : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, |
| DestType); |
| } |
| |
| static CodeGenFunction::ComplexPairTy |
| convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, |
| QualType DestType) { |
| assert(CGF.getEvaluationKind(DestType) == TEK_Complex && |
| "DestType must have complex evaluation kind."); |
| CodeGenFunction::ComplexPairTy ComplexVal; |
| if (Val.isScalar()) { |
| // Convert the input element to the element type of the complex. |
| auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); |
| auto ScalarVal = |
| CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); |
| ComplexVal = CodeGenFunction::ComplexPairTy( |
| ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); |
| } else { |
| assert(Val.isComplex() && "Must be a scalar or complex."); |
| auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); |
| auto DestElementType = DestType->castAs<ComplexType>()->getElementType(); |
| ComplexVal.first = CGF.EmitScalarConversion( |
| Val.getComplexVal().first, SrcElementType, DestElementType); |
| ComplexVal.second = CGF.EmitScalarConversion( |
| Val.getComplexVal().second, SrcElementType, DestElementType); |
| } |
| return ComplexVal; |
| } |
| |
| static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, |
| const Expr *X, const Expr *V, |
| SourceLocation Loc) { |
| // v = x; |
| assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); |
| assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); |
| LValue XLValue = CGF.EmitLValue(X); |
| LValue VLValue = CGF.EmitLValue(V); |
| RValue Res = XLValue.isGlobalReg() |
| ? CGF.EmitLoadOfLValue(XLValue, Loc) |
| : CGF.EmitAtomicLoad(XLValue, Loc, |
| IsSeqCst ? llvm::SequentiallyConsistent |
| : llvm::Monotonic, |
| XLValue.isVolatile()); |
| // OpenMP, 2.12.6, atomic Construct |
| // Any atomic construct with a seq_cst clause forces the atomically |
| // performed operation to include an implicit flush operation without a |
| // list. |
| if (IsSeqCst) |
| CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); |
| switch (CGF.getEvaluationKind(V->getType())) { |
| case TEK_Scalar: |
| CGF.EmitStoreOfScalar( |
| convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue); |
| break; |
| case TEK_Complex: |
| CGF.EmitStoreOfComplex( |
| convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue, |
| /*isInit=*/false); |
| break; |
| case TEK_Aggregate: |
| llvm_unreachable("Must be a scalar or complex."); |
| } |
| } |
| |
| static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, |
| const Expr *X, const Expr *E, |
| SourceLocation Loc) { |
| // x = expr; |
| assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); |
| LValue XLValue = CGF.EmitLValue(X); |
| RValue ExprRValue = CGF.EmitAnyExpr(E); |
| if (XLValue.isGlobalReg()) |
| CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue); |
| else |
| CGF.EmitAtomicStore(ExprRValue, XLValue, |
| IsSeqCst ? llvm::SequentiallyConsistent |
| : llvm::Monotonic, |
| XLValue.isVolatile(), /*IsInit=*/false); |
| // OpenMP, 2.12.6, atomic Construct |
| // Any atomic construct with a seq_cst clause forces the atomically |
| // performed operation to include an implicit flush operation without a |
| // list. |
| if (IsSeqCst) |
| CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); |
| } |
| |
| static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, |
| bool IsSeqCst, const Expr *X, const Expr *V, |
| const Expr *E, SourceLocation Loc) { |
| switch (Kind) { |
| case OMPC_read: |
| EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); |
| break; |
| case OMPC_write: |
| EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); |
| break; |
| case OMPC_update: |
| case OMPC_capture: |
| llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); |
| case OMPC_if: |
| case OMPC_final: |
| case OMPC_num_threads: |
| case OMPC_private: |
| case OMPC_firstprivate: |
| case OMPC_lastprivate: |
| case OMPC_reduction: |
| case OMPC_safelen: |
| case OMPC_collapse: |
| case OMPC_default: |
| case OMPC_seq_cst: |
| case OMPC_shared: |
| case OMPC_linear: |
| case OMPC_aligned: |
| case OMPC_copyin: |
| case OMPC_copyprivate: |
| case OMPC_flush: |
| case OMPC_proc_bind: |
| case OMPC_schedule: |
| case OMPC_ordered: |
| case OMPC_nowait: |
| case OMPC_untied: |
| case OMPC_threadprivate: |
| case OMPC_mergeable: |
| case OMPC_unknown: |
| llvm_unreachable("Clause is not allowed in 'omp atomic'."); |
| } |
| } |
| |
| void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { |
| bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); |
| OpenMPClauseKind Kind = OMPC_unknown; |
| for (auto *C : S.clauses()) { |
| // Find first clause (skip seq_cst clause, if it is first). |
| if (C->getClauseKind() != OMPC_seq_cst) { |
| Kind = C->getClauseKind(); |
| break; |
| } |
| } |
| |
| const auto *CS = |
| S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); |
| if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) |
| enterFullExpression(EWC); |
| InlinedOpenMPRegionScopeRAII Region(*this, S); |
| |
| EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), |
| S.getLocStart()); |
| } |
| |
| void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { |
| llvm_unreachable("CodeGen for 'omp target' is not supported yet."); |
| } |
| |
| void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { |
| llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); |
| } |
| |