[OPENMP] Loop collapsing and codegen for 'omp simd' directive.

This patch implements collapsing of the loops (in particular, in
presense of clause 'collapse'). It calculates number of iterations N
and expressions nesessary to calculate the nested loops counters
values based on new iteration variable (that goes from 0 to N-1)
in Sema. It also adds Codegen for 'omp simd', which uses
(and tests) this feature.

Differential Revision: http://reviews.llvm.org/D5184

llvm-svn: 218743
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index d862743..e70090d 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -1795,8 +1795,12 @@
   SourceLocation DefaultLoc;
   /// \brief A location for diagnostics (when increment is not compatible).
   SourceLocation ConditionLoc;
+  /// \brief A source location for referring to loop init later.
+  SourceRange InitSrcRange;
   /// \brief A source location for referring to condition later.
   SourceRange ConditionSrcRange;
+  /// \brief A source location for referring to increment later.
+  SourceRange IncrementSrcRange;
   /// \brief Loop variable.
   VarDecl *Var;
   /// \brief Reference to loop variable.
@@ -1821,7 +1825,8 @@
 public:
   OpenMPIterationSpaceChecker(Sema &SemaRef, SourceLocation DefaultLoc)
       : SemaRef(SemaRef), DefaultLoc(DefaultLoc), ConditionLoc(DefaultLoc),
-        ConditionSrcRange(SourceRange()), Var(nullptr), VarRef(nullptr),
+        InitSrcRange(SourceRange()), ConditionSrcRange(SourceRange()),
+        IncrementSrcRange(SourceRange()), Var(nullptr), VarRef(nullptr),
         LB(nullptr), UB(nullptr), Step(nullptr), TestIsLessOp(false),
         TestIsStrictOp(false), SubtractStep(false) {}
   /// \brief Check init-expr for canonical loop form and save loop counter
@@ -1837,6 +1842,22 @@
   VarDecl *GetLoopVar() const { return Var; }
   /// \brief Return the reference expression to loop counter variable.
   DeclRefExpr *GetLoopVarRefExpr() const { return VarRef; }
+  /// \brief Source range of the loop init.
+  SourceRange GetInitSrcRange() const { return InitSrcRange; }
+  /// \brief Source range of the loop condition.
+  SourceRange GetConditionSrcRange() const { return ConditionSrcRange; }
+  /// \brief Source range of the loop increment.
+  SourceRange GetIncrementSrcRange() const { return IncrementSrcRange; }
+  /// \brief True if the step should be subtracted.
+  bool ShouldSubtractStep() const { return SubtractStep; }
+  /// \brief Build the expression to calculate the number of iterations.
+  Expr *BuildNumIterations(Scope *S) const;
+  /// \brief Build reference expression to the counter be used for codegen.
+  Expr *BuildCounterVar() const;
+  /// \brief Build initization of the counter be used for codegen.
+  Expr *BuildCounterInit() const;
+  /// \brief Build step of the counter be used for codegen.
+  Expr *BuildCounterStep() const;
   /// \brief Return true if any expression is dependent.
   bool Dependent() const;
 
@@ -1922,10 +1943,12 @@
     bool IsUnsigned = !NewStep->getType()->hasSignedIntegerRepresentation();
     bool IsConstNeg =
         IsConstant && Result.isSigned() && (Subtract != Result.isNegative());
+    bool IsConstPos =
+        IsConstant && Result.isSigned() && (Subtract == Result.isNegative());
     bool IsConstZero = IsConstant && !Result.getBoolValue();
     if (UB && (IsConstZero ||
                (TestIsLessOp ? (IsConstNeg || (IsUnsigned && Subtract))
-                             : (!IsConstNeg || (IsUnsigned && !Subtract))))) {
+                             : (IsConstPos || (IsUnsigned && !Subtract))))) {
       SemaRef.Diag(NewStep->getExprLoc(),
                    diag::err_omp_loop_incr_not_compatible)
           << Var << TestIsLessOp << NewStep->getSourceRange();
@@ -1934,6 +1957,11 @@
           << TestIsLessOp << ConditionSrcRange;
       return true;
     }
+    if (TestIsLessOp == Subtract) {
+      NewStep = SemaRef.CreateBuiltinUnaryOp(NewStep->getExprLoc(), UO_Minus,
+                                             NewStep).get();
+      Subtract = !Subtract;
+    }
   }
 
   Step = NewStep;
@@ -1954,13 +1982,14 @@
     SemaRef.Diag(DefaultLoc, diag::err_omp_loop_not_canonical_init);
     return true;
   }
+  InitSrcRange = S->getSourceRange();
   if (Expr *E = dyn_cast<Expr>(S))
     S = E->IgnoreParens();
   if (auto BO = dyn_cast<BinaryOperator>(S)) {
     if (BO->getOpcode() == BO_Assign)
       if (auto DRE = dyn_cast<DeclRefExpr>(BO->getLHS()->IgnoreParens()))
         return SetVarAndLB(dyn_cast<VarDecl>(DRE->getDecl()), DRE,
-                           BO->getLHS());
+                           BO->getRHS());
   } else if (auto DS = dyn_cast<DeclStmt>(S)) {
     if (DS->isSingleDecl()) {
       if (auto Var = dyn_cast_or_null<VarDecl>(DS->getSingleDecl())) {
@@ -2102,6 +2131,7 @@
     SemaRef.Diag(DefaultLoc, diag::err_omp_loop_not_canonical_incr) << Var;
     return true;
   }
+  IncrementSrcRange = S->getSourceRange();
   S = S->IgnoreParens();
   if (auto UO = dyn_cast<UnaryOperator>(S)) {
     if (UO->isIncrementDecrementOp() && GetInitVarDecl(UO->getSubExpr()) == Var)
@@ -2151,6 +2181,133 @@
       << S->getSourceRange() << Var;
   return true;
 }
+
+/// \brief Build the expression to calculate the number of iterations.
+Expr *OpenMPIterationSpaceChecker::BuildNumIterations(Scope *S) const {
+  ExprResult Diff;
+  if (Var->getType()->isIntegerType() || Var->getType()->isPointerType() ||
+      SemaRef.getLangOpts().CPlusPlus) {
+    // Upper - Lower
+    Expr *Upper = TestIsLessOp ? UB : LB;
+    Expr *Lower = TestIsLessOp ? LB : UB;
+
+    Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Sub, Upper, Lower);
+
+    if (!Diff.isUsable() && Var->getType()->getAsCXXRecordDecl()) {
+      // BuildBinOp already emitted error, this one is to point user to upper
+      // and lower bound, and to tell what is passed to 'operator-'.
+      SemaRef.Diag(Upper->getLocStart(), diag::err_omp_loop_diff_cxx)
+          << Upper->getSourceRange() << Lower->getSourceRange();
+      return nullptr;
+    }
+  }
+
+  if (!Diff.isUsable())
+    return nullptr;
+
+  // Upper - Lower [- 1]
+  if (TestIsStrictOp)
+    Diff = SemaRef.BuildBinOp(
+        S, DefaultLoc, BO_Sub, Diff.get(),
+        SemaRef.ActOnIntegerConstant(SourceLocation(), 1).get());
+  if (!Diff.isUsable())
+    return nullptr;
+
+  // Upper - Lower [- 1] + Step
+  Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Add, Diff.get(),
+                            Step->IgnoreImplicit());
+  if (!Diff.isUsable())
+    return nullptr;
+
+  // Parentheses (for dumping/debugging purposes only).
+  Diff = SemaRef.ActOnParenExpr(DefaultLoc, DefaultLoc, Diff.get());
+  if (!Diff.isUsable())
+    return nullptr;
+
+  // (Upper - Lower [- 1] + Step) / Step
+  Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Div, Diff.get(),
+                            Step->IgnoreImplicit());
+  if (!Diff.isUsable())
+    return nullptr;
+
+  return Diff.get();
+}
+
+/// \brief Build reference expression to the counter be used for codegen.
+Expr *OpenMPIterationSpaceChecker::BuildCounterVar() const {
+  return DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(),
+                             GetIncrementSrcRange().getBegin(), Var, false,
+                             DefaultLoc, Var->getType(), VK_LValue);
+}
+
+/// \brief Build initization of the counter be used for codegen.
+Expr *OpenMPIterationSpaceChecker::BuildCounterInit() const { return LB; }
+
+/// \brief Build step of the counter be used for codegen.
+Expr *OpenMPIterationSpaceChecker::BuildCounterStep() const { return Step; }
+
+/// \brief Iteration space of a single for loop.
+struct LoopIterationSpace {
+  /// \brief This expression calculates the number of iterations in the loop.
+  /// It is always possible to calculate it before starting the loop.
+  Expr *NumIterations;
+  /// \brief The loop counter variable.
+  Expr *CounterVar;
+  /// \brief This is initializer for the initial value of #CounterVar.
+  Expr *CounterInit;
+  /// \brief This is step for the #CounterVar used to generate its update:
+  /// #CounterVar = #CounterInit + #CounterStep * CurrentIteration.
+  Expr *CounterStep;
+  /// \brief Should step be subtracted?
+  bool Subtract;
+  /// \brief Source range of the loop init.
+  SourceRange InitSrcRange;
+  /// \brief Source range of the loop condition.
+  SourceRange CondSrcRange;
+  /// \brief Source range of the loop increment.
+  SourceRange IncSrcRange;
+};
+
+/// \brief The resulting expressions built for the OpenMP loop CodeGen for the
+/// whole collapsed loop nest. See class OMPLoopDirective for their description.
+struct BuiltLoopExprs {
+  Expr *IterationVarRef;
+  Expr *LastIteration;
+  Expr *CalcLastIteration;
+  Expr *PreCond;
+  Expr *Cond;
+  Expr *SeparatedCond;
+  Expr *Init;
+  Expr *Inc;
+  SmallVector<Expr *, 4> Counters;
+  SmallVector<Expr *, 4> Updates;
+  SmallVector<Expr *, 4> Finals;
+
+  bool builtAll() {
+    return IterationVarRef != nullptr && LastIteration != nullptr &&
+           PreCond != nullptr && Cond != nullptr && SeparatedCond != nullptr &&
+           Init != nullptr && Inc != nullptr;
+  }
+  void clear(unsigned size) {
+    IterationVarRef = nullptr;
+    LastIteration = nullptr;
+    CalcLastIteration = nullptr;
+    PreCond = nullptr;
+    Cond = nullptr;
+    SeparatedCond = nullptr;
+    Init = nullptr;
+    Inc = nullptr;
+    Counters.resize(size);
+    Updates.resize(size);
+    Finals.resize(size);
+    for (unsigned i = 0; i < size; ++i) {
+      Counters[i] = nullptr;
+      Updates[i] = nullptr;
+      Finals[i] = nullptr;
+    }
+  }
+};
+
 } // namespace
 
 /// \brief Called on a for stmt to check and extract its iteration space
@@ -2159,7 +2316,8 @@
     OpenMPDirectiveKind DKind, Stmt *S, Sema &SemaRef, DSAStackTy &DSA,
     unsigned CurrentNestedLoopCount, unsigned NestedLoopCount,
     Expr *NestedLoopCountExpr,
-    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA,
+    LoopIterationSpace &ResultIterSpace) {
   // OpenMP [2.6, Canonical Loop Form]
   //   for (init-expr; test-expr; incr-expr) structured-block
   auto For = dyn_cast_or_null<ForStmt>(S);
@@ -2256,35 +2414,96 @@
   // Check incr-expr.
   HasErrors |= ISC.CheckInc(For->getInc());
 
-  if (ISC.Dependent())
+  if (ISC.Dependent() || SemaRef.CurContext->isDependentContext() || HasErrors)
     return HasErrors;
 
-  // FIXME: Build loop's iteration space representation.
+  // Build the loop's iteration space representation.
+  ResultIterSpace.NumIterations = ISC.BuildNumIterations(DSA.getCurScope());
+  ResultIterSpace.CounterVar = ISC.BuildCounterVar();
+  ResultIterSpace.CounterInit = ISC.BuildCounterInit();
+  ResultIterSpace.CounterStep = ISC.BuildCounterStep();
+  ResultIterSpace.InitSrcRange = ISC.GetInitSrcRange();
+  ResultIterSpace.CondSrcRange = ISC.GetConditionSrcRange();
+  ResultIterSpace.IncSrcRange = ISC.GetIncrementSrcRange();
+  ResultIterSpace.Subtract = ISC.ShouldSubtractStep();
+
+  HasErrors |= (ResultIterSpace.NumIterations == nullptr ||
+                ResultIterSpace.CounterVar == nullptr ||
+                ResultIterSpace.CounterInit == nullptr ||
+                ResultIterSpace.CounterStep == nullptr);
+
   return HasErrors;
 }
 
-/// \brief A helper routine to skip no-op (attributed, compound) stmts get the
-/// next nested for loop. If \a IgnoreCaptured is true, it skips captured stmt
-/// to get the first for loop.
-static Stmt *IgnoreContainerStmts(Stmt *S, bool IgnoreCaptured) {
-  if (IgnoreCaptured)
-    if (auto CapS = dyn_cast_or_null<CapturedStmt>(S))
-      S = CapS->getCapturedStmt();
-  // OpenMP [2.8.1, simd construct, Restrictions]
-  // All loops associated with the construct must be perfectly nested; that is,
-  // there must be no intervening code nor any OpenMP directive between any two
-  // loops.
-  while (true) {
-    if (auto AS = dyn_cast_or_null<AttributedStmt>(S))
-      S = AS->getSubStmt();
-    else if (auto CS = dyn_cast_or_null<CompoundStmt>(S)) {
-      if (CS->size() != 1)
-        break;
-      S = CS->body_back();
-    } else
-      break;
-  }
-  return S;
+/// \brief Build a variable declaration for OpenMP loop iteration variable.
+static VarDecl *BuildVarDecl(Sema &SemaRef, SourceLocation Loc, QualType Type,
+                             StringRef Name) {
+  DeclContext *DC = SemaRef.CurContext;
+  IdentifierInfo *II = &SemaRef.PP.getIdentifierTable().get(Name);
+  TypeSourceInfo *TInfo = SemaRef.Context.getTrivialTypeSourceInfo(Type, Loc);
+  VarDecl *Decl =
+      VarDecl::Create(SemaRef.Context, DC, Loc, Loc, II, Type, TInfo, SC_None);
+  Decl->setImplicit();
+  return Decl;
+}
+
+/// \brief Build 'VarRef = Start + Iter * Step'.
+static ExprResult BuildCounterUpdate(Sema &SemaRef, Scope *S,
+                                     SourceLocation Loc, ExprResult VarRef,
+                                     ExprResult Start, ExprResult Iter,
+                                     ExprResult Step, bool Subtract) {
+  // Add parentheses (for debugging purposes only).
+  Iter = SemaRef.ActOnParenExpr(Loc, Loc, Iter.get());
+  if (!VarRef.isUsable() || !Start.isUsable() || !Iter.isUsable() ||
+      !Step.isUsable())
+    return ExprError();
+
+  ExprResult Update = SemaRef.BuildBinOp(S, Loc, BO_Mul, Iter.get(),
+                                         Step.get()->IgnoreImplicit());
+  if (!Update.isUsable())
+    return ExprError();
+
+  // Build 'VarRef = Start + Iter * Step'.
+  Update = SemaRef.BuildBinOp(S, Loc, (Subtract ? BO_Sub : BO_Add),
+                              Start.get()->IgnoreImplicit(), Update.get());
+  if (!Update.isUsable())
+    return ExprError();
+
+  Update = SemaRef.PerformImplicitConversion(
+      Update.get(), VarRef.get()->getType(), Sema::AA_Converting, true);
+  if (!Update.isUsable())
+    return ExprError();
+
+  Update = SemaRef.BuildBinOp(S, Loc, BO_Assign, VarRef.get(), Update.get());
+  return Update;
+}
+
+/// \brief Convert integer expression \a E to make it have at least \a Bits
+/// bits.
+static ExprResult WidenIterationCount(unsigned Bits, Expr *E,
+                                      Sema &SemaRef) {
+  if (E == nullptr)
+    return ExprError();
+  auto &C = SemaRef.Context;
+  QualType OldType = E->getType();
+  unsigned HasBits = C.getTypeSize(OldType);
+  if (HasBits >= Bits)
+    return ExprResult(E);
+  // OK to convert to signed, because new type has more bits than old.
+  QualType NewType = C.getIntTypeForBitwidth(Bits, /* Signed */ true);
+  return SemaRef.PerformImplicitConversion(E, NewType, Sema::AA_Converting,
+                                           true);
+}
+
+/// \brief Check if the given expression \a E is a constant integer that fits
+/// into \a Bits bits.
+static bool FitsInto(unsigned Bits, bool Signed, Expr *E, Sema &SemaRef) {
+  if (E == nullptr)
+    return false;
+  llvm::APSInt Result;
+  if (E->isIntegerConstantExpr(Result, SemaRef.Context))
+    return Signed ? Result.isSignedIntN(Bits) : Result.isIntN(Bits);
+  return false;
 }
 
 /// \brief Called on a for stmt to check itself and nested loops (if any).
@@ -2293,7 +2512,8 @@
 static unsigned
 CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
                 Stmt *AStmt, Sema &SemaRef, DSAStackTy &DSA,
-                llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+                llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA,
+                BuiltLoopExprs &Built) {
   unsigned NestedLoopCount = 1;
   if (NestedLoopCountExpr) {
     // Found 'collapse' clause - calculate collapse number.
@@ -2303,18 +2523,252 @@
   }
   // This is helper routine for loop directives (e.g., 'for', 'simd',
   // 'for simd', etc.).
-  Stmt *CurStmt = IgnoreContainerStmts(AStmt, true);
+  SmallVector<LoopIterationSpace, 4> IterSpaces;
+  IterSpaces.resize(NestedLoopCount);
+  Stmt *CurStmt = AStmt->IgnoreContainers(/* IgnoreCaptured */ true);
   for (unsigned Cnt = 0; Cnt < NestedLoopCount; ++Cnt) {
     if (CheckOpenMPIterationSpace(DKind, CurStmt, SemaRef, DSA, Cnt,
                                   NestedLoopCount, NestedLoopCountExpr,
-                                  VarsWithImplicitDSA))
+                                  VarsWithImplicitDSA, IterSpaces[Cnt]))
       return 0;
     // Move on to the next nested for loop, or to the loop body.
-    CurStmt = IgnoreContainerStmts(cast<ForStmt>(CurStmt)->getBody(), false);
+    // OpenMP [2.8.1, simd construct, Restrictions]
+    // All loops associated with the construct must be perfectly nested; that
+    // is, there must be no intervening code nor any OpenMP directive between
+    // any two loops.
+    CurStmt = cast<ForStmt>(CurStmt)->getBody()->IgnoreContainers();
   }
 
-  // FIXME: Build resulting iteration space for IR generation (collapsing
-  // iteration spaces when loop count > 1 ('collapse' clause)).
+  Built.clear(/* size */ NestedLoopCount);
+
+  if (SemaRef.CurContext->isDependentContext())
+    return NestedLoopCount;
+
+  // An example of what is generated for the following code:
+  //
+  //   #pragma omp simd collapse(2)
+  //   for (i = 0; i < NI; ++i)
+  //     for (j = J0; j < NJ; j+=2) {
+  //     <loop body>
+  //   }
+  //
+  // We generate the code below.
+  // Note: the loop body may be outlined in CodeGen.
+  // Note: some counters may be C++ classes, operator- is used to find number of
+  // iterations and operator+= to calculate counter value.
+  // Note: decltype(NumIterations) must be integer type (in 'omp for', only i32
+  // or i64 is currently supported).
+  //
+  //   #define NumIterations (NI * ((NJ - J0 - 1 + 2) / 2))
+  //   for (int[32|64]_t IV = 0; IV < NumIterations; ++IV ) {
+  //     .local.i = IV / ((NJ - J0 - 1 + 2) / 2);
+  //     .local.j = J0 + (IV % ((NJ - J0 - 1 + 2) / 2)) * 2;
+  //     // similar updates for vars in clauses (e.g. 'linear')
+  //     <loop body (using local i and j)>
+  //   }
+  //   i = NI; // assign final values of counters
+  //   j = NJ;
+  //
+
+  // Last iteration number is (I1 * I2 * ... In) - 1, where I1, I2 ... In are
+  // the iteration counts of the collapsed for loops.
+  auto N0 = IterSpaces[0].NumIterations;
+  ExprResult LastIteration32 = WidenIterationCount(32 /* Bits */, N0, SemaRef);
+  ExprResult LastIteration64 = WidenIterationCount(64 /* Bits */, N0, SemaRef);
+
+  if (!LastIteration32.isUsable() || !LastIteration64.isUsable())
+    return NestedLoopCount;
+
+  auto &C = SemaRef.Context;
+  bool AllCountsNeedLessThan32Bits = C.getTypeSize(N0->getType()) < 32;
+
+  Scope *CurScope = DSA.getCurScope();
+  for (unsigned Cnt = 1; Cnt < NestedLoopCount; ++Cnt) {
+    auto N = IterSpaces[Cnt].NumIterations;
+    AllCountsNeedLessThan32Bits &= C.getTypeSize(N->getType()) < 32;
+    if (LastIteration32.isUsable())
+      LastIteration32 = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul,
+                                           LastIteration32.get(), N);
+    if (LastIteration64.isUsable())
+      LastIteration64 = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_Mul,
+                                           LastIteration64.get(), N);
+  }
+
+  // Choose either the 32-bit or 64-bit version.
+  ExprResult LastIteration = LastIteration64;
+  if (LastIteration32.isUsable() &&
+      C.getTypeSize(LastIteration32.get()->getType()) == 32 &&
+      (AllCountsNeedLessThan32Bits || NestedLoopCount == 1 ||
+       FitsInto(
+           32 /* Bits */,
+           LastIteration32.get()->getType()->hasSignedIntegerRepresentation(),
+           LastIteration64.get(), SemaRef)))
+    LastIteration = LastIteration32;
+
+  if (!LastIteration.isUsable())
+    return 0;
+
+  // Save the number of iterations.
+  ExprResult NumIterations = LastIteration;
+  {
+    LastIteration = SemaRef.BuildBinOp(
+        CurScope, SourceLocation(), BO_Sub, LastIteration.get(),
+        SemaRef.ActOnIntegerConstant(SourceLocation(), 1).get());
+    if (!LastIteration.isUsable())
+      return 0;
+  }
+
+  // Calculate the last iteration number beforehand instead of doing this on
+  // each iteration. Do not do this if the number of iterations may be kfold-ed.
+  llvm::APSInt Result;
+  bool IsConstant =
+      LastIteration.get()->isIntegerConstantExpr(Result, SemaRef.Context);
+  ExprResult CalcLastIteration;
+  if (!IsConstant) {
+    SourceLocation SaveLoc;
+    VarDecl *SaveVar =
+        BuildVarDecl(SemaRef, SaveLoc, LastIteration.get()->getType(),
+                     ".omp.last.iteration");
+    ExprResult SaveRef = SemaRef.BuildDeclRefExpr(
+        SaveVar, LastIteration.get()->getType(), VK_LValue, SaveLoc);
+    CalcLastIteration = SemaRef.BuildBinOp(CurScope, SaveLoc, BO_Assign,
+                                           SaveRef.get(), LastIteration.get());
+    LastIteration = SaveRef;
+
+    // Prepare SaveRef + 1.
+    NumIterations = SemaRef.BuildBinOp(
+        CurScope, SaveLoc, BO_Add, SaveRef.get(),
+        SemaRef.ActOnIntegerConstant(SourceLocation(), 1).get());
+    if (!NumIterations.isUsable())
+      return 0;
+  }
+
+  SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin();
+
+  // Precondition tests if there is at least one iteration (LastIteration > 0).
+  ExprResult PreCond = SemaRef.BuildBinOp(
+      CurScope, InitLoc, BO_GT, LastIteration.get(),
+      SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get());
+
+  // Build the iteration variable and its initialization to zero before loop.
+  ExprResult IV;
+  ExprResult Init;
+  {
+    VarDecl *IVDecl = BuildVarDecl(SemaRef, InitLoc,
+                                   LastIteration.get()->getType(), ".omp.iv");
+    IV = SemaRef.BuildDeclRefExpr(IVDecl, LastIteration.get()->getType(),
+                                  VK_LValue, InitLoc);
+    Init = SemaRef.BuildBinOp(
+        CurScope, InitLoc, BO_Assign, IV.get(),
+        SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get());
+  }
+
+  // Loop condition (IV < NumIterations)
+  SourceLocation CondLoc;
+  ExprResult Cond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
+                                       NumIterations.get());
+  // Loop condition with 1 iteration separated (IV < LastIteration)
+  ExprResult SeparatedCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT,
+                                                IV.get(), LastIteration.get());
+
+  // Loop increment (IV = IV + 1)
+  SourceLocation IncLoc;
+  ExprResult Inc =
+      SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, IV.get(),
+                         SemaRef.ActOnIntegerConstant(IncLoc, 1).get());
+  if (!Inc.isUsable())
+    return 0;
+  Inc = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, IV.get(), Inc.get());
+
+  // Build updates and final values of the loop counters.
+  bool HasErrors = false;
+  Built.Counters.resize(NestedLoopCount);
+  Built.Updates.resize(NestedLoopCount);
+  Built.Finals.resize(NestedLoopCount);
+  {
+    ExprResult Div;
+    // Go from inner nested loop to outer.
+    for (int Cnt = NestedLoopCount - 1; Cnt >= 0; --Cnt) {
+      LoopIterationSpace &IS = IterSpaces[Cnt];
+      SourceLocation UpdLoc = IS.IncSrcRange.getBegin();
+      // Build: Iter = (IV / Div) % IS.NumIters
+      // where Div is product of previous iterations' IS.NumIters.
+      ExprResult Iter;
+      if (Div.isUsable()) {
+        Iter =
+            SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Div, IV.get(), Div.get());
+      } else {
+        Iter = IV;
+        assert((Cnt == (int)NestedLoopCount - 1) &&
+               "unusable div expected on first iteration only");
+      }
+
+      if (Cnt != 0 && Iter.isUsable())
+        Iter = SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Rem, Iter.get(),
+                                  IS.NumIterations);
+      if (!Iter.isUsable()) {
+        HasErrors = true;
+        break;
+      }
+
+      // Build update: IS.CounterVar = IS.Start + Iter * IS.Step
+      ExprResult Update =
+          BuildCounterUpdate(SemaRef, CurScope, UpdLoc, IS.CounterVar,
+                             IS.CounterInit, Iter, IS.CounterStep, IS.Subtract);
+      if (!Update.isUsable()) {
+        HasErrors = true;
+        break;
+      }
+
+      // Build final: IS.CounterVar = IS.Start + IS.NumIters * IS.Step
+      ExprResult Final = BuildCounterUpdate(
+          SemaRef, CurScope, UpdLoc, IS.CounterVar, IS.CounterInit,
+          IS.NumIterations, IS.CounterStep, IS.Subtract);
+      if (!Final.isUsable()) {
+        HasErrors = true;
+        break;
+      }
+
+      // Build Div for the next iteration: Div <- Div * IS.NumIters
+      if (Cnt != 0) {
+        if (Div.isUnset())
+          Div = IS.NumIterations;
+        else
+          Div = SemaRef.BuildBinOp(CurScope, UpdLoc, BO_Mul, Div.get(),
+                                   IS.NumIterations);
+
+        // Add parentheses (for debugging purposes only).
+        if (Div.isUsable())
+          Div = SemaRef.ActOnParenExpr(UpdLoc, UpdLoc, Div.get());
+        if (!Div.isUsable()) {
+          HasErrors = true;
+          break;
+        }
+      }
+      if (!Update.isUsable() || !Final.isUsable()) {
+        HasErrors = true;
+        break;
+      }
+      // Save results
+      Built.Counters[Cnt] = IS.CounterVar;
+      Built.Updates[Cnt] = Update.get();
+      Built.Finals[Cnt] = Final.get();
+    }
+  }
+
+  if (HasErrors)
+    return 0;
+
+  // Save results
+  Built.IterationVarRef = IV.get();
+  Built.LastIteration = LastIteration.get();
+  Built.CalcLastIteration = CalcLastIteration.get();
+  Built.PreCond = PreCond.get();
+  Built.Cond = Cond.get();
+  Built.SeparatedCond = SeparatedCond.get();
+  Built.Init = Init.get();
+  Built.Inc = Inc.get();
+
   return NestedLoopCount;
 }
 
@@ -2333,48 +2787,63 @@
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
     llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+  BuiltLoopExprs B;
   // In presence of clause 'collapse', it will define the nested loops number.
   unsigned NestedLoopCount =
       CheckOpenMPLoop(OMPD_simd, GetCollapseNumberExpr(Clauses), AStmt, *this,
-                      *DSAStack, VarsWithImplicitDSA);
+                      *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp simd loop exprs were not built");
+
   getCurFunction()->setHasBranchProtectedScope();
-  return OMPSimdDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
-                                  Clauses, AStmt);
+  return OMPSimdDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
+      B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
+      B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
 }
 
 StmtResult Sema::ActOnOpenMPForDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
     llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+  BuiltLoopExprs B;
   // In presence of clause 'collapse', it will define the nested loops number.
   unsigned NestedLoopCount =
       CheckOpenMPLoop(OMPD_for, GetCollapseNumberExpr(Clauses), AStmt, *this,
-                      *DSAStack, VarsWithImplicitDSA);
+                      *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp for loop exprs were not built");
+
   getCurFunction()->setHasBranchProtectedScope();
-  return OMPForDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
-                                 Clauses, AStmt);
+  return OMPForDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
+      B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
+      B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
 }
 
 StmtResult Sema::ActOnOpenMPForSimdDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
     llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+  BuiltLoopExprs B;
   // In presence of clause 'collapse', it will define the nested loops number.
   unsigned NestedLoopCount =
       CheckOpenMPLoop(OMPD_for_simd, GetCollapseNumberExpr(Clauses), AStmt,
-                      *this, *DSAStack, VarsWithImplicitDSA);
+                      *this, *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
   getCurFunction()->setHasBranchProtectedScope();
-  return OMPForSimdDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
-                                     Clauses, AStmt);
+  return OMPForSimdDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
+      B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
+      B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
 }
 
 StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef<OMPClause *> Clauses,
@@ -2467,16 +2936,22 @@
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
 
+  BuiltLoopExprs B;
   // In presence of clause 'collapse', it will define the nested loops number.
   unsigned NestedLoopCount =
       CheckOpenMPLoop(OMPD_parallel_for, GetCollapseNumberExpr(Clauses), AStmt,
-                      *this, *DSAStack, VarsWithImplicitDSA);
+                      *this, *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp parallel for loop exprs were not built");
+
   getCurFunction()->setHasBranchProtectedScope();
-  return OMPParallelForDirective::Create(Context, StartLoc, EndLoc,
-                                         NestedLoopCount, Clauses, AStmt);
+  return OMPParallelForDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
+      B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
+      B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
 }
 
 StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
@@ -2492,16 +2967,19 @@
   // longjmp() and throw() must not violate the entry/exit criteria.
   CS->getCapturedDecl()->setNothrow();
 
+  BuiltLoopExprs B;
   // In presence of clause 'collapse', it will define the nested loops number.
   unsigned NestedLoopCount =
       CheckOpenMPLoop(OMPD_parallel_for_simd, GetCollapseNumberExpr(Clauses),
-                      AStmt, *this, *DSAStack, VarsWithImplicitDSA);
+                      AStmt, *this, *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
   getCurFunction()->setHasBranchProtectedScope();
-  return OMPParallelForSimdDirective::Create(Context, StartLoc, EndLoc,
-                                             NestedLoopCount, Clauses, AStmt);
+  return OMPParallelForSimdDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt,
+      B.IterationVarRef, B.LastIteration, B.CalcLastIteration, B.PreCond,
+      B.Cond, B.SeparatedCond, B.Init, B.Inc, B.Counters, B.Updates, B.Finals);
 }
 
 StmtResult