Extend getConstantTripCount to deal with a larger subset of loop bounds; make loop
unroll/unroll-and-jam more powerful; add additional affine expr builder methods

- use previously added analysis/simplification to infer multiple of unroll
  factor trip counts, making loop unroll/unroll-and-jam more general.

- for loop unroll, support bounds that are single result affine map's with the
  same set of operands. For unknown loop bounds, loop unroll will now work as
  long as trip count can be determined to be a multiple of unroll factor.

- extend getConstantTripCount to deal with single result affine map's with the
  same operands. move it to mlir/Analysis/LoopAnalysis.cpp

- add additional builder utility methods for affine expr arithmetic
  (difference, mod/floordiv/ceildiv w.r.t postitive constant). simplify code to
  use the utility methods.

- move affine analysis routines to AffineAnalysis.cpp/.h from
  AffineStructures.cpp/.h.

- Rename LoopUnrollJam to LoopUnrollAndJam to match class name.

- add an additional simplification for simplifyFloorDiv, simplifyCeilDiv

- Rename AffineMap::getNumOperands() getNumInputs: an affine map by itself does
  not have operands. Operands are passed to it through affine_apply, from loop
  bounds/if condition's, etc., operands are stored in the latter.

This should be sufficiently powerful for now as far as unroll/unroll-and-jam go for TPU
code generation, and can move to other analyses/transformations.

Loop nests like these are now unrolled without any cleanup loop being generated.

  for %i = 1 to 100 {
    // unroll factor 4: no cleanup loop will be generated.
    for %j = (d0) -> (d0) (%i) to (d0) -> (5*d0 + 3) (%i) {
      %x = "foo"(%j) : (affineint) -> i32
    }
  }

  for %i = 1 to 100 {
    // unroll factor 4: no cleanup loop will be generated.
    for %j = (d0) -> (d0) (%i) to (d0) -> (d0 - d mod 4 - 1) (%i) {
      %y = "foo"(%j) : (affineint) -> i32
    }
  }

  for %i = 1 to 100 {
    for %j = (d0) -> (d0) (%i) to (d0) -> (d0 + 128) (%i) {
      %x = "foo"() : () -> i32
    }
  }

TODO(bondhugula): extend this to LoopUnrollAndJam as well in the next CL (with minor
changes).

PiperOrigin-RevId: 212661212
diff --git a/include/mlir/Analysis/AffineAnalysis.h b/include/mlir/Analysis/AffineAnalysis.h
new file mode 100644
index 0000000..605791d
--- /dev/null
+++ b/include/mlir/Analysis/AffineAnalysis.h
@@ -0,0 +1,42 @@
+//===- AffineAnalysis.h - analyses for affine structures --------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This header file defines prototypes for methods that perform analysis
+// involving affine structures (AffineExpr, AffineMap, IntegerSet, etc.) and
+// other IR structures that in turn use these.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_ANALYSIS_AFFINE_ANALYSIS_H
+#define MLIR_ANALYSIS_AFFINE_ANALYSIS_H
+
+#include "llvm/ADT/Optional.h"
+
+namespace mlir {
+
+class AffineExpr;
+class MLIRContext;
+
+/// Simplify an affine expression through flattening and some amount of
+/// simple analysis. This has complexity linear in the number of nodes in
+/// 'expr'. Return nullptr, if the expression can't be simplified.
+AffineExpr *simplifyAffineExpr(AffineExpr *expr, unsigned numDims,
+                               unsigned numSymbols, MLIRContext *context);
+
+} // end namespace mlir
+
+#endif // MLIR_ANALYSIS_AFFINE_ANALYSIS_H
diff --git a/include/mlir/Analysis/AffineStructures.h b/include/mlir/Analysis/AffineStructures.h
index aaae119..ac6e728 100644
--- a/include/mlir/Analysis/AffineStructures.h
+++ b/include/mlir/Analysis/AffineStructures.h
@@ -38,12 +38,6 @@
 class MLValue;
 class HyperRectangularSet;
 
-/// Simplify an affine expression through flattening and some amount of
-/// simple analysis. This has complexity linear in the number of nodes in
-/// 'expr'. Return nullptr, if the expression can't be simplified.
-AffineExpr *simplifyAffineExpr(AffineExpr *expr, unsigned numDims,
-                               unsigned numSymbols, MLIRContext *context);
-
 /// A mutable affine map. Its affine expressions are however unique.
 struct MutableAffineMap {
 public:
diff --git a/include/mlir/Analysis/LoopAnalysis.h b/include/mlir/Analysis/LoopAnalysis.h
new file mode 100644
index 0000000..482a74c
--- /dev/null
+++ b/include/mlir/Analysis/LoopAnalysis.h
@@ -0,0 +1,49 @@
+//===- LoopAnalysis.h - loop analysis methods -------------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This header file defines prototypes for methods to analyze loops.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_ANALYSIS_LOOP_ANALYSIS_H
+#define MLIR_ANALYSIS_LOOP_ANALYSIS_H
+
+#include "llvm/ADT/Optional.h"
+
+namespace mlir {
+
+class AffineExpr;
+class ForStmt;
+
+/// Returns the trip count of the loop as an affine expression if the latter is
+/// expressible as an affine expression, and nullptr otherwise. The trip count
+/// expression is simplified before returning.
+AffineExpr *getTripCount(const ForStmt &forStmt);
+
+/// Returns the trip count of the loop if it's a constant, None otherwise. This
+/// uses affine expression analysis and is able to determine constant trip count
+/// in non-trivial cases.
+llvm::Optional<uint64_t> getConstantTripCount(const ForStmt &forStmt);
+
+/// Returns the greatest known integral divisor of the trip count. Affine
+/// expression analysis is used (indirectly through getTripCount), and
+/// this method is thus able to determine non-trivial divisors.
+uint64_t getLargestDivisorOfTripCount(const ForStmt &forStmt);
+
+} // end namespace mlir
+
+#endif // MLIR_ANALYSIS_LOOP_ANALYSIS_H
diff --git a/include/mlir/IR/AffineExpr.h b/include/mlir/IR/AffineExpr.h
index dccbe91..f2f3a9f 100644
--- a/include/mlir/IR/AffineExpr.h
+++ b/include/mlir/IR/AffineExpr.h
@@ -70,8 +70,8 @@
   /// floordiv, ceildiv, and mod is only allowed w.r.t constants.
   bool isPureAffine() const;
 
-  /// Returns the greatest known common divisor of this affine expression.
-  uint64_t getKnownGcd() const;
+  /// Returns the greatest known integral divisor of this affine expression.
+  uint64_t getLargestKnownDivisor() const;
 
   /// Return true if the affine expression is a multiple of 'factor'.
   bool isMultipleOf(int64_t factor) const;
@@ -107,6 +107,11 @@
                             MLIRContext *context) {
     return get(AffineExpr::Kind::Add, lhs, rhs, context);
   }
+  static AffineExpr *getAdd(AffineExpr *expr, int64_t rhs,
+                            MLIRContext *context);
+  static AffineExpr *getSub(AffineExpr *lhs, AffineExpr *rhs,
+                            MLIRContext *context);
+
   static AffineExpr *getMul(AffineExpr *lhs, AffineExpr *rhs,
                             MLIRContext *context) {
     return get(AffineExpr::Kind::Mul, lhs, rhs, context);
@@ -119,6 +124,8 @@
                                 MLIRContext *context) {
     return get(AffineExpr::Kind::CeilDiv, lhs, rhs, context);
   }
+  static AffineExpr *getCeilDiv(AffineExpr *lhs, uint64_t rhs,
+                                MLIRContext *context);
   static AffineExpr *getMod(AffineExpr *lhs, AffineExpr *rhs,
                             MLIRContext *context) {
     return get(AffineExpr::Kind::Mod, lhs, rhs, context);
diff --git a/include/mlir/IR/AffineMap.h b/include/mlir/IR/AffineMap.h
index 9f3ffee..1f5d497 100644
--- a/include/mlir/IR/AffineMap.h
+++ b/include/mlir/IR/AffineMap.h
@@ -69,7 +69,7 @@
   unsigned getNumDims() const { return numDims; }
   unsigned getNumSymbols() const { return numSymbols; }
   unsigned getNumResults() const { return numResults; }
-  unsigned getNumOperands() const { return numDims + numSymbols; }
+  unsigned getNumInputs() const { return numDims + numSymbols; }
 
   ArrayRef<AffineExpr *> getResults() const {
     return ArrayRef<AffineExpr *>(results, numResults);
diff --git a/include/mlir/IR/Builders.h b/include/mlir/IR/Builders.h
index f6cd245..7cd0dd5 100644
--- a/include/mlir/IR/Builders.h
+++ b/include/mlir/IR/Builders.h
@@ -106,9 +106,13 @@
   AffineExpr *getAddExpr(AffineExpr *lhs, AffineExpr *rhs);
   AffineExpr *getSubExpr(AffineExpr *lhs, AffineExpr *rhs);
   AffineExpr *getMulExpr(AffineExpr *lhs, AffineExpr *rhs);
+  AffineExpr *getMulExpr(AffineExpr *lhs, int64_t rhs);
   AffineExpr *getModExpr(AffineExpr *lhs, AffineExpr *rhs);
+  AffineExpr *getModExpr(AffineExpr *lhs, uint64_t rhs);
   AffineExpr *getFloorDivExpr(AffineExpr *lhs, AffineExpr *rhs);
+  AffineExpr *getFloorDivExpr(AffineExpr *lhs, uint64_t rhs);
   AffineExpr *getCeilDivExpr(AffineExpr *lhs, AffineExpr *rhs);
+  AffineExpr *getCeilDivExpr(AffineExpr *lhs, uint64_t rhs);
 
   // Integer set.
   IntegerSet *getIntegerSet(unsigned dimCount, unsigned symbolCount,
diff --git a/include/mlir/IR/Statement.h b/include/mlir/IR/Statement.h
index 55a0aec..3b2bbeb 100644
--- a/include/mlir/IR/Statement.h
+++ b/include/mlir/IR/Statement.h
@@ -110,6 +110,7 @@
     return operand_iterator(this, getNumOperands());
   }
 
+  /// Returns an iterator on the underlying MLValue's (MLValue *).
   llvm::iterator_range<operand_iterator> getOperands() {
     return {operand_begin(), operand_end()};
   }
@@ -126,6 +127,7 @@
     return const_operand_iterator(this, getNumOperands());
   }
 
+  /// Returns a const iterator on the underlying MLValue's (MLValue *).
   llvm::iterator_range<const_operand_iterator> getOperands() const {
     return {operand_begin(), operand_end()};
   }
diff --git a/include/mlir/IR/Statements.h b/include/mlir/IR/Statements.h
index 513bc08..c0de576 100644
--- a/include/mlir/IR/Statements.h
+++ b/include/mlir/IR/Statements.h
@@ -81,6 +81,7 @@
     return operand_iterator(this, getNumOperands());
   }
 
+  /// Returns an iterator on the underlying MLValue's (MLValue *).
   llvm::iterator_range<operand_iterator> getOperands() {
     return {operand_begin(), operand_end()};
   }
@@ -97,6 +98,7 @@
     return const_operand_iterator(this, getNumOperands());
   }
 
+  /// Returns a const iterator on the underlying MLValue's (MLValue *).
   llvm::iterator_range<const_operand_iterator> getOperands() const {
     return {operand_begin(), operand_end()};
   }
@@ -244,6 +246,13 @@
   void setLowerBound(ArrayRef<MLValue *> operands, AffineMap *map);
   /// Set upper bound.
   void setUpperBound(ArrayRef<MLValue *> operands, AffineMap *map);
+
+  /// Set the lower bound map without changing operands.
+  void setLowerBoundMap(AffineMap *map);
+
+  /// Set the upper bound map without changing operands.
+  void setUpperBoundMap(AffineMap *map);
+
   /// Set loop step.
   void setStep(int64_t step) { this->step = step; }
 
@@ -266,9 +275,6 @@
   /// Sets the upper bound to the given constant value.
   void setConstantUpperBound(int64_t value);
 
-  /// Returns the trip count if it's a constant.
-  Optional<uint64_t> getConstantTripCount() const;
-
   //===--------------------------------------------------------------------===//
   // Operands
   //===--------------------------------------------------------------------===//
@@ -361,16 +367,19 @@
     return stmt.getStmtOperand(opStart + idx);
   }
 
-  using operand_iterator = ForStmt::const_operand_iterator;
-  using operand_range = ForStmt::const_operand_range;
+  using operand_iterator = ForStmt::operand_iterator;
+  using operand_range = ForStmt::operand_range;
 
   operand_iterator operand_begin() const {
-    return operand_iterator(&stmt, opStart);
+    // These are iterators over MLValue *. Not casting away const'ness would
+    // require the caller to use const MLValue *.
+    return operand_iterator(const_cast<ForStmt *>(&stmt), opStart);
   }
   operand_iterator operand_end() const {
-    return operand_iterator(&stmt, opEnd);
+    return operand_iterator(const_cast<ForStmt *>(&stmt), opEnd);
   }
 
+  /// Returns an iterator on the underlying MLValue's (MLValue *).
   operand_range getOperands() const { return {operand_begin(), operand_end()}; }
   ArrayRef<StmtOperand> getStmtOperands() const {
     auto ops = stmt.getStmtOperands();