blob: 2fd8fe228b17ea11e58ca258dad3ab25ae944839 [file] [log] [blame]
Chris Lattneree0c2ae2018-07-29 12:37:35 -07001//===- Unroll.cpp - Code to perform loop unrolling ------------------------===//
Uday Bondhugula0b4059b2018-07-24 20:01:16 -07002//
3// Copyright 2019 The MLIR Authors.
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16// =============================================================================
17//
18// This file implements loop unrolling.
19//
20//===----------------------------------------------------------------------===//
21
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -070022#include "mlir/Transforms/Passes.h"
23
24#include "mlir/Analysis/LoopAnalysis.h"
Tatiana Shpeismande8829f2018-08-24 23:38:14 -070025#include "mlir/IR/AffineExpr.h"
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -070026#include "mlir/IR/AffineMap.h"
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070027#include "mlir/IR/Builders.h"
Uday Bondhugula84b80952018-08-03 13:22:26 -070028#include "mlir/IR/StandardOps.h"
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070029#include "mlir/IR/StmtVisitor.h"
Uday Bondhugula6c1f6602018-08-13 17:25:13 -070030#include "mlir/Transforms/Pass.h"
Chris Lattnere787b322018-08-08 11:14:57 -070031#include "llvm/ADT/DenseMap.h"
Uday Bondhugula67701712018-08-21 16:01:23 -070032#include "llvm/Support/CommandLine.h"
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070033
34using namespace mlir;
Uday Bondhugula67701712018-08-21 16:01:23 -070035
36// Loop unrolling factor.
37static llvm::cl::opt<unsigned>
Uday Bondhugula832b17a2018-09-07 14:47:21 -070038 clUnrollFactor("unroll-factor", llvm::cl::Hidden,
39 llvm::cl::desc("Use this unroll factor for all loops"));
Uday Bondhugula67701712018-08-21 16:01:23 -070040
Uday Bondhugula832b17a2018-09-07 14:47:21 -070041static llvm::cl::opt<bool> clUnrollFull("unroll-full", llvm::cl::Hidden,
42 llvm::cl::desc("Fully unroll loops"));
Uday Bondhugula67701712018-08-21 16:01:23 -070043
44static llvm::cl::opt<unsigned> clUnrollFullThreshold(
Uday Bondhugula832b17a2018-09-07 14:47:21 -070045 "unroll-full-threshold", llvm::cl::Hidden,
46 llvm::cl::desc(
47 "Unroll all loops with trip count less than or equal to this"));
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070048
49namespace {
Uday Bondhugula67701712018-08-21 16:01:23 -070050/// Loop unrolling pass. Unrolls all innermost loops unless full unrolling and a
51/// full unroll threshold was specified, in which case, fully unrolls all loops
52/// with trip count less than the specified threshold. The latter is for testing
53/// purposes, especially for testing outer loop unrolling.
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070054struct LoopUnroll : public MLFunctionPass {
Uday Bondhugula67701712018-08-21 16:01:23 -070055 Optional<unsigned> unrollFactor;
56 Optional<bool> unrollFull;
Uday Bondhugula0077e622018-08-16 13:51:44 -070057
Uday Bondhugula67701712018-08-21 16:01:23 -070058 explicit LoopUnroll(Optional<unsigned> unrollFactor,
59 Optional<bool> unrollFull)
60 : unrollFactor(unrollFactor), unrollFull(unrollFull) {}
61
Uday Bondhugula134154e2018-08-06 18:40:34 -070062 void runOnMLFunction(MLFunction *f) override;
Uday Bondhugula67701712018-08-21 16:01:23 -070063 /// Unroll this for stmt. Returns false if nothing was done.
64 bool runOnForStmt(ForStmt *forStmt);
65 bool loopUnrollFull(ForStmt *forStmt);
Uday Bondhugula832b17a2018-09-07 14:47:21 -070066 bool loopUnrollByFactor(ForStmt *forStmt, uint64_t unrollFactor);
Uday Bondhugula134154e2018-08-06 18:40:34 -070067};
Chris Lattneree0c2ae2018-07-29 12:37:35 -070068} // end anonymous namespace
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070069
Uday Bondhugula67701712018-08-21 16:01:23 -070070MLFunctionPass *mlir::createLoopUnrollPass(int unrollFactor, int unrollFull) {
71 return new LoopUnroll(unrollFactor == -1 ? None
72 : Optional<unsigned>(unrollFactor),
73 unrollFull == -1 ? None : Optional<bool>(unrollFull));
Uday Bondhugula134154e2018-08-06 18:40:34 -070074}
75
Chris Lattneree0c2ae2018-07-29 12:37:35 -070076void LoopUnroll::runOnMLFunction(MLFunction *f) {
Uday Bondhugula081d9e72018-07-27 10:58:14 -070077 // Gathers all innermost loops through a post order pruned walk.
Uday Bondhugula081d9e72018-07-27 10:58:14 -070078 class InnermostLoopGatherer : public StmtWalker<InnermostLoopGatherer, bool> {
79 public:
80 // Store innermost loops as we walk.
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070081 std::vector<ForStmt *> loops;
Uday Bondhugula081d9e72018-07-27 10:58:14 -070082
83 // This method specialized to encode custom return logic.
84 typedef llvm::iplist<Statement> StmtListType;
Uday Bondhugula8572d1a2018-07-30 10:49:49 -070085 bool walkPostOrder(StmtListType::iterator Start,
86 StmtListType::iterator End) {
Uday Bondhugula15984952018-08-01 22:36:12 -070087 bool hasInnerLoops = false;
88 // We need to walk all elements since all innermost loops need to be
89 // gathered as opposed to determining whether this list has any inner
90 // loops or not.
Uday Bondhugula081d9e72018-07-27 10:58:14 -070091 while (Start != End)
Uday Bondhugula15984952018-08-01 22:36:12 -070092 hasInnerLoops |= walkPostOrder(&(*Start++));
93 return hasInnerLoops;
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070094 }
Uday Bondhugula081d9e72018-07-27 10:58:14 -070095
Uday Bondhugula8572d1a2018-07-30 10:49:49 -070096 bool walkForStmtPostOrder(ForStmt *forStmt) {
97 bool hasInnerLoops = walkPostOrder(forStmt->begin(), forStmt->end());
Uday Bondhugula081d9e72018-07-27 10:58:14 -070098 if (!hasInnerLoops)
99 loops.push_back(forStmt);
100 return true;
101 }
102
Uday Bondhugula8572d1a2018-07-30 10:49:49 -0700103 bool walkIfStmtPostOrder(IfStmt *ifStmt) {
Chris Lattnere787b322018-08-08 11:14:57 -0700104 bool hasInnerLoops =
105 walkPostOrder(ifStmt->getThen()->begin(), ifStmt->getThen()->end());
106 hasInnerLoops |=
107 walkPostOrder(ifStmt->getElse()->begin(), ifStmt->getElse()->end());
Uday Bondhugula15984952018-08-01 22:36:12 -0700108 return hasInnerLoops;
Uday Bondhugula081d9e72018-07-27 10:58:14 -0700109 }
110
Uday Bondhugula134154e2018-08-06 18:40:34 -0700111 bool visitOperationStmt(OperationStmt *opStmt) { return false; }
Uday Bondhugula081d9e72018-07-27 10:58:14 -0700112
Uday Bondhugula134154e2018-08-06 18:40:34 -0700113 // FIXME: can't use base class method for this because that in turn would
114 // need to use the derived class method above. CRTP doesn't allow it, and
115 // the compiler error resulting from it is also misleading.
Uday Bondhugula8572d1a2018-07-30 10:49:49 -0700116 using StmtWalker<InnermostLoopGatherer, bool>::walkPostOrder;
Uday Bondhugula0b4059b2018-07-24 20:01:16 -0700117 };
118
Uday Bondhugula134154e2018-08-06 18:40:34 -0700119 // Gathers all loops with trip count <= minTripCount.
120 class ShortLoopGatherer : public StmtWalker<ShortLoopGatherer> {
121 public:
122 // Store short loops as we walk.
123 std::vector<ForStmt *> loops;
124 const unsigned minTripCount;
125 ShortLoopGatherer(unsigned minTripCount) : minTripCount(minTripCount) {}
Uday Bondhugula15984952018-08-01 22:36:12 -0700126
Uday Bondhugula134154e2018-08-06 18:40:34 -0700127 void visitForStmt(ForStmt *forStmt) {
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700128 Optional<uint64_t> tripCount = getConstantTripCount(*forStmt);
Uday Bondhugula832b17a2018-09-07 14:47:21 -0700129 if (tripCount.hasValue() && tripCount.getValue() <= minTripCount)
Uday Bondhugula134154e2018-08-06 18:40:34 -0700130 loops.push_back(forStmt);
Uday Bondhugula15984952018-08-01 22:36:12 -0700131 }
132 };
133
Uday Bondhugula67701712018-08-21 16:01:23 -0700134 if (clUnrollFull.getNumOccurrences() > 0 &&
135 clUnrollFullThreshold.getNumOccurrences() > 0) {
136 ShortLoopGatherer slg(clUnrollFullThreshold);
137 // Do a post order walk so that loops are gathered from innermost to
138 // outermost (or else unrolling an outer one may delete gathered inner
139 // ones).
140 slg.walkPostOrder(f);
141 auto &loops = slg.loops;
142 for (auto *forStmt : loops)
143 loopUnrollFull(forStmt);
144 return;
145 }
146
147 InnermostLoopGatherer ilg;
148 ilg.walkPostOrder(f);
149 auto &loops = ilg.loops;
Uday Bondhugula134154e2018-08-06 18:40:34 -0700150 for (auto *forStmt : loops)
151 runOnForStmt(forStmt);
152}
153
Uday Bondhugula67701712018-08-21 16:01:23 -0700154/// Unroll a for stmt. Default unroll factor is 4.
155bool LoopUnroll::runOnForStmt(ForStmt *forStmt) {
Uday Bondhugula6cd35022018-08-28 18:24:27 -0700156 // Unroll by the factor passed, if any.
157 if (unrollFactor.hasValue())
158 return loopUnrollByFactor(forStmt, unrollFactor.getValue());
159 // Unroll by the command line factor if one was specified.
160 if (clUnrollFactor.getNumOccurrences() > 0)
161 return loopUnrollByFactor(forStmt, clUnrollFactor);
Uday Bondhugula67701712018-08-21 16:01:23 -0700162 // Unroll completely if full loop unroll was specified.
163 if (clUnrollFull.getNumOccurrences() > 0 ||
164 (unrollFull.hasValue() && unrollFull.getValue()))
165 return loopUnrollFull(forStmt);
166
Uday Bondhugula67701712018-08-21 16:01:23 -0700167 // Unroll by four otherwise.
168 return loopUnrollByFactor(forStmt, 4);
169}
170
Tatiana Shpeismande8829f2018-08-24 23:38:14 -0700171// Unrolls this loop completely. Fails assertion if loop bounds are
172// non-constant.
Uday Bondhugula67701712018-08-21 16:01:23 -0700173bool LoopUnroll::loopUnrollFull(ForStmt *forStmt) {
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700174 Optional<uint64_t> tripCount = getConstantTripCount(*forStmt);
Uday Bondhugula832b17a2018-09-07 14:47:21 -0700175 if (tripCount.hasValue())
176 return loopUnrollByFactor(forStmt, tripCount.getValue());
177 return false;
Uday Bondhugula67701712018-08-21 16:01:23 -0700178}
179
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700180/// Returns the upper bound of an unrolled loop with lower bound 'lb' and with
181/// the specified trip count, stride, and unroll factor.
182static AffineMap *getUnrolledLoopUpperBound(AffineMap *lbMap,
183 uint64_t tripCount,
184 unsigned unrollFactor, int64_t step,
185 MLFuncBuilder *builder) {
186 assert(lbMap->getNumResults() == 1);
187 auto *lbExpr = lbMap->getResult(0);
188 // lbExpr + (count - count % unrollFactor - 1) * step).
189 auto *expr = builder->getAddExpr(
190 lbExpr, builder->getConstantExpr(
191 (tripCount - tripCount % unrollFactor - 1) * step));
192 return builder->getAffineMap(lbMap->getNumDims(), lbMap->getNumSymbols(),
193 {expr}, {});
194}
195
196/// Returns the lower bound of the cleanup loop when unrolling a loop with lower
197/// bound 'lb' and with the specified trip count, stride, and unroll factor.
198static AffineMap *getCleanupLoopLowerBound(AffineMap *lbMap, uint64_t tripCount,
199 unsigned unrollFactor, int64_t step,
200 MLFuncBuilder *builder) {
201 assert(lbMap->getNumResults() == 1);
202 auto *lbExpr = lbMap->getResult(0);
203 // lbExpr + (count - count % unrollFactor) * step);
204 auto *expr = builder->getAddExpr(
205 lbExpr,
206 builder->getConstantExpr((tripCount - tripCount % unrollFactor) * step));
207 return builder->getAffineMap(lbMap->getNumDims(), lbMap->getNumSymbols(),
208 {expr}, {});
209}
210
Uday Bondhugula67701712018-08-21 16:01:23 -0700211/// Unrolls this loop by the specified unroll factor.
Uday Bondhugula832b17a2018-09-07 14:47:21 -0700212bool LoopUnroll::loopUnrollByFactor(ForStmt *forStmt, uint64_t unrollFactor) {
Uday Bondhugula67701712018-08-21 16:01:23 -0700213 assert(unrollFactor >= 1 && "unroll factor shoud be >= 1");
214
215 if (unrollFactor == 1 || forStmt->getStatements().empty())
216 return false;
217
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700218 Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(*forStmt);
219
220 if (!mayBeConstantTripCount.hasValue() &&
221 getLargestDivisorOfTripCount(*forStmt) % unrollFactor != 0)
Tatiana Shpeismande8829f2018-08-24 23:38:14 -0700222 return false;
223
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700224 const AffineBound &lb = forStmt->getLowerBound();
225 const AffineBound &ub = forStmt->getLowerBound();
226 auto lbMap = lb.getMap();
227 auto ubMap = lb.getMap();
228
229 // Loops with max/min expressions won't be unrolled here (the output can't be
230 // expressed as an MLFunction in the general case). However, the right way to
231 // do such unrolling for an MLFunction would be to specialize the loop for the
232 // 'hotspot' case and unroll that hotspot case.
233 if (lbMap->getNumResults() != 1 || ubMap->getNumResults() != 1)
234 return false;
235
236 // TODO(bondhugula): handle bounds with different sets of operands.
237 // Same operand list for now.
238 if (lbMap->getNumDims() != ubMap->getNumDims() ||
239 lbMap->getNumSymbols() != ubMap->getNumSymbols())
240 return false;
241 unsigned i, e = lb.getNumOperands();
242 for (i = 0; i < e; i++) {
243 if (lb.getStmtOperand(i).get() != ub.getStmtOperand(i).get())
244 break;
245 }
246 if (i != e)
247 return false;
248
Uday Bondhugula832b17a2018-09-07 14:47:21 -0700249 int64_t step = forStmt->getStep();
Uday Bondhugula67701712018-08-21 16:01:23 -0700250
251 // If the trip count is lower than the unroll factor, no unrolled body.
252 // TODO(bondhugula): option to specify cleanup loop unrolling.
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700253 if (mayBeConstantTripCount.hasValue() &&
254 mayBeConstantTripCount.getValue() < unrollFactor)
255 return false;
Uday Bondhugula67701712018-08-21 16:01:23 -0700256
257 // Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700258 // If the trip count is unknown, we currently unroll only when the unknown
259 // trip count is known to be a multiple of unroll factor - hence, no cleanup
260 // loop will be necessary in those cases.
261 // TODO(bondhugula): handle generation of cleanup loop for unknown trip count
262 // when it's not known to be a multiple of unroll factor (still for single
263 // result / same operands case).
264 if (mayBeConstantTripCount.hasValue() &&
265 mayBeConstantTripCount.getValue() % unrollFactor != 0) {
266 uint64_t tripCount = mayBeConstantTripCount.getValue();
Uday Bondhugula67701712018-08-21 16:01:23 -0700267 DenseMap<const MLValue *, MLValue *> operandMap;
268 MLFuncBuilder builder(forStmt->getBlock(), ++StmtBlock::iterator(forStmt));
269 auto *cleanupForStmt = cast<ForStmt>(builder.clone(*forStmt, operandMap));
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700270 if (forStmt->hasConstantLowerBound()) {
271 cleanupForStmt->setConstantLowerBound(
272 forStmt->getConstantLowerBound() +
273 (tripCount - tripCount % unrollFactor) * step);
274 } else {
275 cleanupForStmt->setLowerBoundMap(
276 getCleanupLoopLowerBound(forStmt->getLowerBoundMap(), tripCount,
277 unrollFactor, step, &builder));
278 }
Uday Bondhugula832b17a2018-09-07 14:47:21 -0700279 // Promote the loop body up if this has turned into a single iteration loop.
280 promoteIfSingleIteration(cleanupForStmt);
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700281
282 // The upper bound needs to be adjusted.
283 if (forStmt->hasConstantUpperBound()) {
284 forStmt->setConstantUpperBound(
285 forStmt->getConstantLowerBound() +
286 (tripCount - tripCount % unrollFactor - 1) * step);
287 } else {
288 forStmt->setUpperBoundMap(
289 getUnrolledLoopUpperBound(forStmt->getLowerBoundMap(), tripCount,
290 unrollFactor, step, &builder));
291 }
Uday Bondhugula67701712018-08-21 16:01:23 -0700292 }
293
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700294 // Scale the step of loop being unrolled by unroll factor.
295 forStmt->setStep(step * unrollFactor);
296
Uday Bondhugula67701712018-08-21 16:01:23 -0700297 // Builder to insert unrolled bodies right after the last statement in the
298 // body of 'forStmt'.
299 MLFuncBuilder builder(forStmt, StmtBlock::iterator(forStmt->end()));
Uday Bondhugula67701712018-08-21 16:01:23 -0700300
301 // Keep a pointer to the last statement in the original block so that we know
302 // what to clone (since we are doing this in-place).
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700303 StmtBlock::iterator srcBlockEnd = std::prev(forStmt->end());
Uday Bondhugula67701712018-08-21 16:01:23 -0700304
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700305 // Unroll the contents of 'forStmt' (append unrollFactor-1 additional copies).
Uday Bondhugula67701712018-08-21 16:01:23 -0700306 for (unsigned i = 1; i < unrollFactor; i++) {
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700307 DenseMap<const MLValue *, MLValue *> operandMap;
Uday Bondhugula67701712018-08-21 16:01:23 -0700308
309 // If the induction variable is used, create a remapping to the value for
310 // this unrolled instance.
311 if (!forStmt->use_empty()) {
312 // iv' = iv + 1/2/3...unrollFactor-1;
313 auto *bumpExpr = builder.getAddExpr(builder.getDimExpr(0),
314 builder.getConstantExpr(i * step));
315 auto *bumpMap = builder.getAffineMap(1, 0, {bumpExpr}, {});
316 auto *ivUnroll =
Chris Lattner1628fa02018-08-23 14:32:25 -0700317 builder.create<AffineApplyOp>(forStmt->getLoc(), bumpMap, forStmt)
318 ->getResult(0);
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700319 operandMap[forStmt] = cast<MLValue>(ivUnroll);
Uday Bondhugula67701712018-08-21 16:01:23 -0700320 }
321
Uday Bondhugulacf4f4c42018-09-12 10:21:23 -0700322 // Clone the original body of 'forStmt'.
323 for (auto it = forStmt->begin(); it != std::next(srcBlockEnd); it++) {
324 builder.clone(*it, operandMap);
Uday Bondhugula67701712018-08-21 16:01:23 -0700325 }
Uday Bondhugula67701712018-08-21 16:01:23 -0700326 }
Uday Bondhugula832b17a2018-09-07 14:47:21 -0700327
328 // Promote the loop body up if this has turned into a single iteration loop.
329 promoteIfSingleIteration(forStmt);
330
Uday Bondhugula67701712018-08-21 16:01:23 -0700331 return true;
Uday Bondhugula0b4059b2018-07-24 20:01:16 -0700332}