[OPENMP50]Codegen for lastprivate conditional list items.
Added codegen support for lastprivate conditional. According to the
standard, if when the conditional modifier appears on the clause, if an
assignment to a list item is encountered in the construct then the
original list item is assigned the value that is assigned to the new
list item in the sequentially last iteration or lexically last section
in which such an assignment is encountered.
We look for the assignment operations and check if the left side
references lastprivate conditional variable. Then the next code is
emitted:
if (last_iv_a <= iv) {
last_iv_a = iv;
last_a = lp_a;
}
At the end the implicit barrier is generated to wait for the end of all
threads and then in the check for the last iteration the private copy is
assigned the last value.
if (last_iter) {
lp_a = last_a; // <--- new code
a = lp_a; // <--- store of private value to the original variable.
}
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index c729037..e43ed50 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4717,6 +4717,9 @@
if (RV.isScalar())
EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc());
EmitStoreThroughLValue(RV, LV);
+ if (getLangOpts().OpenMP)
+ CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this,
+ E->getLHS());
return LV;
}
diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp
index 6b11969..f7a4e9e 100644
--- a/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/clang/lib/CodeGen/CGExprComplex.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "clang/AST/StmtVisitor.h"
@@ -1136,7 +1137,11 @@
LValue CodeGenFunction::EmitComplexAssignmentLValue(const BinaryOperator *E) {
assert(E->getOpcode() == BO_Assign);
ComplexPairTy Val; // ignored
- return ComplexExprEmitter(*this).EmitBinAssignLValue(E, Val);
+ LValue LVal = ComplexExprEmitter(*this).EmitBinAssignLValue(E, Val);
+ if (getLangOpts().OpenMP)
+ CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this,
+ E->getLHS());
+ return LVal;
}
typedef ComplexPairTy (ComplexExprEmitter::*CompoundFunc)(
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 12bf37e..d759d36 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -14,6 +14,7 @@
#include "CGCleanup.h"
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
+#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "ConstantEmitter.h"
@@ -2997,6 +2998,9 @@
else
CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV);
+ if (CGF.getLangOpts().OpenMP)
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF,
+ E->getLHS());
return LHSLV;
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 59f352d..735cacf 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -19,6 +19,7 @@
#include "clang/AST/Decl.h"
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/StmtOpenMP.h"
+#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/BitmaskEnum.h"
#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/ADT/ArrayRef.h"
@@ -11401,6 +11402,268 @@
[VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
}
+CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
+ : CGM(CGF.CGM),
+ NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
+ [](const OMPLastprivateClause *C) {
+ return C->getKind() ==
+ OMPC_LASTPRIVATE_conditional;
+ })) {
+ assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
+ if (!NeedToPush)
+ return;
+ LastprivateConditionalData &Data =
+ CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
+ for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
+ if (C->getKind() != OMPC_LASTPRIVATE_conditional)
+ continue;
+
+ for (const Expr *Ref : C->varlists()) {
+ Data.DeclToUniqeName.try_emplace(
+ cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
+ generateUniqueName(CGM, "pl_cond", Ref));
+ }
+ }
+ Data.IVLVal = IVLVal;
+ // In simd only mode or for simd directives no need to generate threadprivate
+ // references for the loop iteration counter, we can use the original one
+ // since outlining cannot happen in simd regions.
+ if (CGF.getLangOpts().OpenMPSimd ||
+ isOpenMPSimdDirective(S.getDirectiveKind())) {
+ Data.UseOriginalIV = true;
+ return;
+ }
+ llvm::SmallString<16> Buffer;
+ llvm::raw_svector_ostream OS(Buffer);
+ PresumedLoc PLoc =
+ CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
+ assert(PLoc.isValid() && "Source location is expected to be always valid.");
+
+ llvm::sys::fs::UniqueID ID;
+ if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
+ CGM.getDiags().Report(diag::err_cannot_open_file)
+ << PLoc.getFilename() << EC.message();
+ OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
+ << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
+ Data.IVName = OS.str();
+
+ // Global loop counter. Required to handle inner parallel-for regions.
+ // global_iv = &iv;
+ QualType PtrIVTy = CGM.getContext().getPointerType(IVLVal.getType());
+ Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, PtrIVTy, Data.IVName);
+ LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, PtrIVTy);
+ CGF.EmitStoreOfScalar(IVLVal.getPointer(CGF), GlobIVLVal);
+}
+
+CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
+ if (!NeedToPush)
+ return;
+ CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
+}
+
+namespace {
+/// Checks if the lastprivate conditional variable is referenced in LHS.
+class LastprivateConditionalRefChecker final
+ : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
+ CodeGenFunction &CGF;
+ ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
+ const Expr *FoundE = nullptr;
+ const Decl *FoundD = nullptr;
+ StringRef UniqueDeclName;
+ LValue IVLVal;
+ StringRef IVName;
+ SourceLocation Loc;
+ bool UseOriginalIV = false;
+
+public:
+ bool VisitDeclRefExpr(const DeclRefExpr *E) {
+ for (const CGOpenMPRuntime::LastprivateConditionalData &D :
+ llvm::reverse(LPM)) {
+ auto It = D.DeclToUniqeName.find(E->getDecl());
+ if (It == D.DeclToUniqeName.end())
+ continue;
+ FoundE = E;
+ FoundD = E->getDecl()->getCanonicalDecl();
+ UniqueDeclName = It->getSecond();
+ IVLVal = D.IVLVal;
+ IVName = D.IVName;
+ UseOriginalIV = D.UseOriginalIV;
+ break;
+ }
+ return FoundE == E;
+ }
+ bool VisitMemberExpr(const MemberExpr *E) {
+ if (!CGF.IsWrappedCXXThis(E->getBase()))
+ return false;
+ for (const CGOpenMPRuntime::LastprivateConditionalData &D :
+ llvm::reverse(LPM)) {
+ auto It = D.DeclToUniqeName.find(E->getMemberDecl());
+ if (It == D.DeclToUniqeName.end())
+ continue;
+ FoundE = E;
+ FoundD = E->getMemberDecl()->getCanonicalDecl();
+ UniqueDeclName = It->getSecond();
+ IVLVal = D.IVLVal;
+ IVName = D.IVName;
+ UseOriginalIV = D.UseOriginalIV;
+ break;
+ }
+ return FoundE == E;
+ }
+ bool VisitStmt(const Stmt *S) {
+ for (const Stmt *Child : S->children()) {
+ if (!Child)
+ continue;
+ if (const auto *E = dyn_cast<Expr>(Child))
+ if (!E->isGLValue())
+ continue;
+ if (Visit(Child))
+ return true;
+ }
+ return false;
+ }
+ explicit LastprivateConditionalRefChecker(
+ CodeGenFunction &CGF,
+ ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
+ : CGF(CGF), LPM(LPM) {}
+ std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
+ getFoundData() const {
+ return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
+ UseOriginalIV);
+ }
+};
+} // namespace
+
+void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
+ const Expr *LHS) {
+ if (CGF.getLangOpts().OpenMP < 50)
+ return;
+ LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
+ if (!Checker.Visit(LHS))
+ return;
+ const Expr *FoundE;
+ const Decl *FoundD;
+ StringRef UniqueDeclName;
+ LValue IVLVal;
+ StringRef IVName;
+ bool UseOriginalIV;
+ std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
+ Checker.getFoundData();
+
+ // Last updated loop counter for the lastprivate conditional var.
+ // int<xx> last_iv = 0;
+ llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
+ llvm::Constant *LastIV =
+ getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
+ cast<llvm::GlobalVariable>(LastIV)->setAlignment(
+ IVLVal.getAlignment().getAsAlign());
+ LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
+
+ // Private address of the lastprivate conditional in the current context.
+ // priv_a
+ LValue LVal = CGF.EmitLValue(FoundE);
+ // Last value of the lastprivate conditional.
+ // decltype(priv_a) last_a;
+ llvm::Constant *Last = getOrCreateInternalVariable(
+ LVal.getAddress(CGF).getElementType(), UniqueDeclName);
+ cast<llvm::GlobalVariable>(Last)->setAlignment(
+ LVal.getAlignment().getAsAlign());
+ LValue LastLVal =
+ CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
+
+ // Global loop counter. Required to handle inner parallel-for regions.
+ // global_iv
+ if (!UseOriginalIV) {
+ QualType PtrIVTy = CGM.getContext().getPointerType(IVLVal.getType());
+ Address IVAddr = getAddrOfArtificialThreadPrivate(CGF, PtrIVTy, IVName);
+ IVLVal =
+ CGF.EmitLoadOfPointerLValue(IVAddr, PtrIVTy->castAs<PointerType>());
+ }
+ llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
+
+ // #pragma omp critical(a)
+ // if (last_iv <= iv) {
+ // last_iv = iv;
+ // last_a = priv_a;
+ // }
+ auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
+ FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ llvm::Value *LastIVVal =
+ CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
+ // (last_iv <= global_iv) ? Check if the variable is updated and store new
+ // value in global var.
+ llvm::Value *CmpRes;
+ if (IVLVal.getType()->isSignedIntegerType()) {
+ CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
+ } else {
+ assert(IVLVal.getType()->isUnsignedIntegerType() &&
+ "Loop iteration variable must be integer.");
+ CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
+ }
+ llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
+ CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
+ // {
+ CGF.EmitBlock(ThenBB);
+
+ // last_iv = global_iv;
+ CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
+
+ // last_a = priv_a;
+ switch (CGF.getEvaluationKind(LVal.getType())) {
+ case TEK_Scalar: {
+ llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
+ CGF.EmitStoreOfScalar(PrivVal, LastLVal);
+ break;
+ }
+ case TEK_Complex: {
+ CodeGenFunction::ComplexPairTy PrivVal =
+ CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
+ CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
+ break;
+ }
+ case TEK_Aggregate:
+ llvm_unreachable(
+ "Aggregates are not supported in lastprivate conditional.");
+ }
+ // }
+ CGF.EmitBranch(ExitBB);
+ // There is no need to emit line number for unconditional branch.
+ (void)ApplyDebugLocation::CreateEmpty(CGF);
+ CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+ };
+
+ if (CGM.getLangOpts().OpenMPSimd) {
+ // Do not emit as a critical region as no parallel region could be emitted.
+ RegionCodeGenTy ThenRCG(CodeGen);
+ ThenRCG(CGF);
+ } else {
+ emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
+ }
+}
+
+void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
+ CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
+ SourceLocation Loc) {
+ if (CGF.getLangOpts().OpenMP < 50)
+ return;
+ auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
+ assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
+ "Unknown lastprivate conditional variable.");
+ StringRef UniqueName = It->getSecond();
+ llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
+ // The variable was not updated in the region - exit.
+ if (!GV)
+ return;
+ LValue LPLVal = CGF.MakeAddrLValue(
+ GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
+ llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
+ CGF.EmitStoreOfScalar(Res, PrivLVal);
+}
+
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index bcaa06a..2a6a6b9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -222,6 +222,33 @@
~NontemporalDeclsRAII();
};
+ /// Maps the expression for the lastprivate variable to the global copy used
+ /// to store new value because original variables are not mapped in inner
+ /// parallel regions. Only private copies are captured but we need also to
+ /// store private copy in shared address.
+ /// Also, stores the expression for the private loop counter and it
+ /// threaprivate name.
+ struct LastprivateConditionalData {
+ llvm::SmallDenseMap<CanonicalDeclPtr<const Decl>, SmallString<16>>
+ DeclToUniqeName;
+ LValue IVLVal;
+ SmallString<16> IVName;
+ /// True if original lvalue for loop counter can be used in codegen (simd
+ /// region or simd only mode) and no need to create threadprivate
+ /// references.
+ bool UseOriginalIV = false;
+ };
+ /// Manages list of lastprivate conditional decls for the specified directive.
+ class LastprivateConditionalRAII {
+ CodeGenModule &CGM;
+ const bool NeedToPush;
+
+ public:
+ LastprivateConditionalRAII(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S, LValue IVLVal);
+ ~LastprivateConditionalRAII();
+ };
+
protected:
CodeGenModule &CGM;
StringRef FirstSeparator, Separator;
@@ -666,6 +693,11 @@
/// The set is the union of all current stack elements.
llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack;
+ /// Stack for list of addresses of declarations in current context marked as
+ /// lastprivate conditional. The set is the union of all current stack
+ /// elements.
+ llvm::SmallVector<LastprivateConditionalData, 4> LastprivateConditionalStack;
+
/// Flag for keeping track of weather a requires unified_shared_memory
/// directive is present.
bool HasRequiresUnifiedSharedMemory = false;
@@ -1683,6 +1715,31 @@
/// Checks if the \p VD variable is marked as nontemporal declaration in
/// current context.
bool isNontemporalDecl(const ValueDecl *VD) const;
+
+ /// Checks if the provided \p LVal is lastprivate conditional and emits the
+ /// code to update the value of the original variable.
+ /// \code
+ /// lastprivate(conditional: a)
+ /// ...
+ /// <type> a;
+ /// lp_a = ...;
+ /// #pragma omp critical(a)
+ /// if (last_iv_a <= iv) {
+ /// last_iv_a = iv;
+ /// global_a = lp_a;
+ /// }
+ /// \endcode
+ virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
+ const Expr *LHS);
+
+ /// Gets the address of the global copy used for lastprivate conditional
+ /// update, if any.
+ /// \param PrivLVal LValue for the private copy.
+ /// \param VD Original lastprivate declaration.
+ virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF,
+ LValue PrivLVal,
+ const VarDecl *VD,
+ SourceLocation Loc);
};
/// Class supports emissionof SIMD-only code.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index a38a79b..cac0e7d 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1045,6 +1045,18 @@
llvm::BasicBlock *ThenBB = nullptr;
llvm::BasicBlock *DoneBB = nullptr;
if (IsLastIterCond) {
+ // Emit implicit barrier if at least one lastprivate conditional is found
+ // and this is not a simd mode.
+ if (!getLangOpts().OpenMPSimd &&
+ llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(),
+ [](const OMPLastprivateClause *C) {
+ return C->getKind() == OMPC_LASTPRIVATE_conditional;
+ })) {
+ CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
+ OMPD_unknown,
+ /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
+ }
ThenBB = createBasicBlock(".omp.lastprivate.then");
DoneBB = createBasicBlock(".omp.lastprivate.done");
Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
@@ -1083,14 +1095,19 @@
cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
const auto *DestVD =
cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
- // Get the address of the original variable.
- Address OriginalAddr = GetAddrOfLocalVar(DestVD);
// Get the address of the private variable.
Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
PrivateAddr =
Address(Builder.CreateLoad(PrivateAddr),
getNaturalTypeAlignment(RefTy->getPointeeType()));
+ // Store the last value to the private copy in the last iteration.
+ if (C->getKind() == OMPC_LASTPRIVATE_conditional)
+ CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
+ *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD,
+ (*IRef)->getExprLoc());
+ // Get the address of the original variable.
+ Address OriginalAddr = GetAddrOfLocalVar(DestVD);
EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
}
++IRef;
@@ -1974,6 +1991,8 @@
CGF.EmitOMPLinearClause(S, LoopScope);
CGF.EmitOMPPrivateClause(S, LoopScope);
CGF.EmitOMPReductionClauseInit(S, LoopScope);
+ CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
+ CGF, S, CGF.EmitLValue(S.getIterationVariable()));
bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
(void)LoopScope.Privatize();
if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
@@ -2546,6 +2565,8 @@
/*ForceSimpleCall=*/true);
}
EmitOMPPrivateClause(S, LoopScope);
+ CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
+ *this, S, EmitLValue(S.getIterationVariable()));
HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
EmitOMPReductionClauseInit(S, LoopScope);
EmitOMPPrivateLoopCounters(S, LoopScope);
@@ -2856,6 +2877,7 @@
/*ForceSimpleCall=*/true);
}
CGF.EmitOMPPrivateClause(S, LoopScope);
+ CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
CGF.EmitOMPReductionClauseInit(S, LoopScope);
(void)LoopScope.Privatize();