[OPENMP] Fix codegen for ordered loop directives. loops with ordered clause must be generated the same way as dynamic loops, but with static scheduleing. llvm-svn: 237788

commit: d7589ffe1d9319e1ff36bee1b6d61b44aabbba09 [log] [tgz]
author: Alexey Bataev <a.bataev@hotmail.com> Wed May 20 13:12:48 2015 +0000
committer: Alexey Bataev <a.bataev@hotmail.com> Wed May 20 13:12:48 2015 +0000
tree: 1a67568845a90ac92cb20763065c511a4d08b58e
parent: 15413ea02ba202f2de500059930cc60cf1af9ae4 [diff]
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e96200a..a866525 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp

@@ -1449,51 +1449,61 @@
   OMP_sch_auto = 38,
   /// \brief Lower bound for 'ordered' versions.
   OMP_ord_lower = 64,
-  /// \brief Lower bound for 'nomerge' versions.
-  OMP_nm_lower = 160,
+  OMP_ord_static_chunked = 65,
+  OMP_ord_static = 66,
+  OMP_ord_dynamic_chunked = 67,
+  OMP_ord_guided_chunked = 68,
+  OMP_ord_runtime = 69,
+  OMP_ord_auto = 70,
+  OMP_sch_default = OMP_sch_static,
 };
 
 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
-                                          bool Chunked) {
+                                          bool Chunked, bool Ordered) {
   switch (ScheduleKind) {
   case OMPC_SCHEDULE_static:
-    return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
+    return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
+                   : (Ordered ? OMP_ord_static : OMP_sch_static);
   case OMPC_SCHEDULE_dynamic:
-    return OMP_sch_dynamic_chunked;
+    return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
   case OMPC_SCHEDULE_guided:
-    return OMP_sch_guided_chunked;
-  case OMPC_SCHEDULE_auto:
-    return OMP_sch_auto;
+    return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
   case OMPC_SCHEDULE_runtime:
-    return OMP_sch_runtime;
+    return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
+  case OMPC_SCHEDULE_auto:
+    return Ordered ? OMP_ord_auto : OMP_sch_auto;
   case OMPC_SCHEDULE_unknown:
     assert(!Chunked && "chunk was specified but schedule kind not known");
-    return OMP_sch_static;
+    return Ordered ? OMP_ord_static : OMP_sch_static;
   }
   llvm_unreachable("Unexpected runtime schedule");
 }
 
 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
                                          bool Chunked) const {
-  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
   return Schedule == OMP_sch_static;
 }
 
 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
-  auto Schedule = getRuntimeSchedule(ScheduleKind, /* Chunked */ false);
+  auto Schedule =
+      getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
   return Schedule != OMP_sch_static;
 }
 
 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
                                   OpenMPScheduleClauseKind ScheduleKind,
-                                  unsigned IVSize, bool IVSigned,
+                                  unsigned IVSize, bool IVSigned, bool Ordered,
                                   llvm::Value *IL, llvm::Value *LB,
                                   llvm::Value *UB, llvm::Value *ST,
                                   llvm::Value *Chunk) {
-  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
-  if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) {
+  OpenMPSchedType Schedule =
+      getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
+  if (Ordered ||
+      (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
+       Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
     // Call __kmpc_dispatch_init(
     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
     //          kmp_int[32|64] lower, kmp_int[32|64] upper,
@@ -1518,12 +1528,13 @@
     //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
     //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
     if (Chunk == nullptr) {
-      assert(Schedule == OMP_sch_static &&
+      assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
              "expected static non-chunked schedule");
       // If the Chunk was not specified in the clause - use default value 1.
       Chunk = CGF.Builder.getIntN(IVSize, 1);
     } else
-      assert(Schedule == OMP_sch_static_chunked &&
+      assert((Schedule == OMP_sch_static_chunked ||
+              Schedule == OMP_ord_static_chunked) &&
              "expected static chunked schedule");
     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
                             getThreadID(CGF, Loc),
@@ -1548,10 +1559,10 @@
                       Args);
 }
 
-void CGOpenMPRuntime::emitForOrderedDynamicIterationEnd(CodeGenFunction &CGF,
-                                                        SourceLocation Loc,
-                                                        unsigned IVSize,
-                                                        bool IVSigned) {
+void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
+                                                 SourceLocation Loc,
+                                                 unsigned IVSize,
+                                                 bool IVSigned) {
   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
                          getThreadID(CGF, Loc)};

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index bcfe610..f5aa4a5 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h

@@ -439,6 +439,7 @@
   /// \param SchedKind Schedule kind, specified by the 'schedule' clause.
   /// \param IVSize Size of the iteration variable in bits.
   /// \param IVSigned Sign of the interation variable.
+  /// \param Ordered true if loop is ordered, false otherwise.
   /// \param IL Address of the output variable in which the flag of the
   /// last iteration is returned.
   /// \param LB Address of the output variable in which the lower iteration
@@ -452,8 +453,8 @@
   ///
   virtual void emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
                            OpenMPScheduleClauseKind SchedKind, unsigned IVSize,
-                           bool IVSigned, llvm::Value *IL, llvm::Value *LB,
-                           llvm::Value *UB, llvm::Value *ST,
+                           bool IVSigned, bool Ordered, llvm::Value *IL,
+                           llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
                            llvm::Value *Chunk = nullptr);
 
   /// \brief Call the appropriate runtime routine to notify that we finished
@@ -464,10 +465,9 @@
   /// \param IVSize Size of the iteration variable in bits.
   /// \param IVSigned Sign of the interation variable.
   ///
-  virtual void emitForOrderedDynamicIterationEnd(CodeGenFunction &CGF,
-                                                 SourceLocation Loc,
-                                                 unsigned IVSize,
-                                                 bool IVSigned);
+  virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF,
+                                          SourceLocation Loc, unsigned IVSize,
+                                          bool IVSigned);
 
   /// \brief Call the appropriate runtime routine to notify that we finished
   /// all the work with current loop.

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 61945c6..f04a293 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp

@@ -803,15 +803,16 @@
 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
                                           const OMPLoopDirective &S,
                                           OMPPrivateScope &LoopScope,
-                                          llvm::Value *LB, llvm::Value *UB,
-                                          llvm::Value *ST, llvm::Value *IL,
-                                          llvm::Value *Chunk) {
+                                          bool Ordered, llvm::Value *LB,
+                                          llvm::Value *UB, llvm::Value *ST,
+                                          llvm::Value *IL, llvm::Value *Chunk) {
   auto &RT = CGM.getOpenMPRuntime();
 
   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
-  const bool Dynamic = RT.isDynamic(ScheduleKind);
+  const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind);
 
-  assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
+  assert((Ordered ||
+          !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) &&
          "static non-chunked schedule does not need outer loop");
 
   // Emit outer loop.
@@ -869,9 +870,10 @@
   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
 
   RT.emitForInit(
-      *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB,
-      (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST,
-      Chunk);
+      *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, IL, LB,
+      (DynamicOrOrdered ? EmitAnyExpr(S.getLastIteration()).getScalarVal()
+                        : UB),
+      ST, Chunk);
 
   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
 
@@ -881,7 +883,7 @@
   LoopStack.push(CondBlock);
 
   llvm::Value *BoolCondVal = nullptr;
-  if (!Dynamic) {
+  if (!DynamicOrOrdered) {
     // UB = min(UB, GlobalUB)
     EmitIgnoredExpr(S.getEnsureUpperBound());
     // IV = LB
@@ -909,21 +911,19 @@
 
   // Emit "IV = LB" (in case of static schedule, we have already calculated new
   // LB for loop condition and emitted it above).
-  if (Dynamic)
+  if (DynamicOrOrdered)
     EmitIgnoredExpr(S.getInit());
 
   // Create a block for the increment.
   auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
 
-  bool DynamicWithOrderedClause =
-      Dynamic && S.getSingleClause(OMPC_ordered) != nullptr;
   SourceLocation Loc = S.getLocStart();
   // Generate !llvm.loop.parallel metadata for loads and stores for loops with
   // dynamic/guided scheduling and without ordered clause.
   LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic ||
                          ScheduleKind == OMPC_SCHEDULE_guided) &&
-                        !DynamicWithOrderedClause);
+                        !Ordered);
   EmitOMPInnerLoop(
       S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false),
       S.getInc(),
@@ -931,16 +931,16 @@
         CGF.EmitOMPLoopBody(S);
         CGF.EmitStopPoint(&S);
       },
-      [DynamicWithOrderedClause, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
-        if (DynamicWithOrderedClause) {
-          CGF.CGM.getOpenMPRuntime().emitForOrderedDynamicIterationEnd(
+      [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
+        if (Ordered) {
+          CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
               CGF, Loc, IVSize, IVSigned);
         }
       });
 
   EmitBlock(Continue.getBlock());
   BreakContinueStack.pop_back();
-  if (!Dynamic) {
+  if (!DynamicOrOrdered) {
     // Emit "LB = LB + Stride", "UB = UB + Stride".
     EmitIgnoredExpr(S.getNextLowerBound());
     EmitIgnoredExpr(S.getNextUpperBound());
@@ -952,7 +952,7 @@
   EmitBlock(LoopExit.getBlock());
 
   // Tell the runtime we are done.
-  if (!Dynamic)
+  if (!DynamicOrOrdered)
     RT.emitForStaticFinish(*this, S.getLocEnd());
 }
 
@@ -1066,16 +1066,18 @@
       ScheduleKind = ScheduleInfo.second;
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+      const bool Ordered = S.getSingleClause(OMPC_ordered) != nullptr;
       if (RT.isStaticNonchunked(ScheduleKind,
-                                /* Chunked */ Chunk != nullptr)) {
+                                /* Chunked */ Chunk != nullptr) &&
+          !Ordered) {
         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
         // When no chunk_size is specified, the iteration space is divided into
         // chunks that are approximately equal in size, and at most one chunk is
         // distributed to each thread. Note that the size of the chunks is
         // unspecified in this case.
         RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
-                       IL.getAddress(), LB.getAddress(), UB.getAddress(),
-                       ST.getAddress());
+                       Ordered, IL.getAddress(), LB.getAddress(),
+                       UB.getAddress(), ST.getAddress());
         // UB = min(UB, GlobalUB);
         EmitIgnoredExpr(S.getEnsureUpperBound());
         // IV = LB;
@@ -1093,9 +1095,9 @@
       } else {
         // Emit the outer loop, which requests its work chunk [LB..UB] from
         // runtime and runs the inner loop to process it.
-        EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(),
-                            UB.getAddress(), ST.getAddress(), IL.getAddress(),
-                            Chunk);
+        EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, Ordered,
+                            LB.getAddress(), UB.getAddress(), ST.getAddress(),
+                            IL.getAddress(), Chunk);
       }
       EmitOMPReductionClauseFinal(S);
       // Emit final copy of the lastprivate variables if IsLastIter != 0.
@@ -1213,8 +1215,8 @@
       // Emit static non-chunked loop.
       CGF.CGM.getOpenMPRuntime().emitForInit(
           CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
-          /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(),
-          ST.getAddress());
+          /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
+          LB.getAddress(), UB.getAddress(), ST.getAddress());
       // UB = min(UB, GlobalUB);
       auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
       auto *MinUBGlobalUB = CGF.Builder.CreateSelect(

diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index be134bc..650ad7b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h

@@ -2198,9 +2198,9 @@
   bool EmitOMPWorksharingLoop(const OMPLoopDirective &S);
   void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
                            const OMPLoopDirective &S,
-                           OMPPrivateScope &LoopScope, llvm::Value *LB,
-                           llvm::Value *UB, llvm::Value *ST, llvm::Value *IL,
-                           llvm::Value *Chunk);
+                           OMPPrivateScope &LoopScope, bool Ordered,
+                           llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
+                           llvm::Value *IL, llvm::Value *Chunk);
 
 public:
commit	d7589ffe1d9319e1ff36bee1b6d61b44aabbba09	[log] [tgz]
author	Alexey Bataev <a.bataev@hotmail.com>	Wed May 20 13:12:48 2015 +0000
committer	Alexey Bataev <a.bataev@hotmail.com>	Wed May 20 13:12:48 2015 +0000
tree	1a67568845a90ac92cb20763065c511a4d08b58e
parent	15413ea02ba202f2de500059930cc60cf1af9ae4 [diff]