[OpenMP][NVPTX] Use single loops when generating code for distribute parallel for
Summary: This patch adds a new code generation path for bound sharing directives containing distribute parallel for. The new code generation scheme applies to chunked schedules on distribute and parallel for directives. The scheme simplifies the code that is being generated by eliminating the need for an outer for loop over chunks for both distribute and parallel for directives. In the case of distribute it applies to any sized chunk while in the parallel for case it only applies when chunk size is 1.
Reviewers: ABataev, caomhin
Reviewed By: ABataev
Subscribers: jholewinski, guansong, cfe-commits
Differential Revision: https://reviews.llvm.org/D53448
llvm-svn: 345509
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index a6d1a11..e08e776 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -410,7 +410,7 @@
}
return IsDuplicate;
}
-
+
/// Set default data sharing attribute to none.
void setDefaultDSANone(SourceLocation Loc) {
assert(!isStackEmpty());
@@ -5296,6 +5296,12 @@
? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get())
: SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
NumIterations.get());
+ ExprResult CombDistCond;
+ if (isOpenMPLoopBoundSharingDirective(DKind)) {
+ CombDistCond =
+ SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), NumIterations.get());
+ }
+
ExprResult CombCond;
if (isOpenMPLoopBoundSharingDirective(DKind)) {
CombCond =
@@ -5370,7 +5376,7 @@
// on PrevUB instead of NumIterations - used to implement 'for' when found
// in combination with 'distribute', like in 'distribute parallel for'
SourceLocation DistIncLoc = AStmt->getBeginLoc();
- ExprResult DistCond, DistInc, PrevEUB;
+ ExprResult DistCond, DistInc, PrevEUB, ParForInDistCond;
if (isOpenMPLoopBoundSharingDirective(DKind)) {
DistCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get());
assert(DistCond.isUsable() && "distribute cond expr was not built");
@@ -5393,6 +5399,11 @@
PrevEUB = SemaRef.BuildBinOp(CurScope, DistIncLoc, BO_Assign, UB.get(),
CondOp.get());
PrevEUB = SemaRef.ActOnFinishFullExpr(PrevEUB.get());
+
+ // Build IV <= PrevUB to be used in parallel for is in combination with
+ // a distribute directive with schedule(static, 1)
+ ParForInDistCond =
+ SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), PrevUB.get());
}
// Build updates and final values of the loop counters.
@@ -5516,6 +5527,8 @@
Built.DistCombinedFields.Cond = CombCond.get();
Built.DistCombinedFields.NLB = CombNextLB.get();
Built.DistCombinedFields.NUB = CombNextUB.get();
+ Built.DistCombinedFields.DistCond = CombDistCond.get();
+ Built.DistCombinedFields.ParForInDistCond = ParForInDistCond.get();
return NestedLoopCount;
}