blob: 382b8308eac127923493ec33db9150b3e8535971 [file] [log] [blame]
Chris Lattneree0c2ae2018-07-29 12:37:35 -07001//===- Unroll.cpp - Code to perform loop unrolling ------------------------===//
Uday Bondhugula0b4059b2018-07-24 20:01:16 -07002//
3// Copyright 2019 The MLIR Authors.
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16// =============================================================================
17//
18// This file implements loop unrolling.
19//
20//===----------------------------------------------------------------------===//
21
Tatiana Shpeismande8829f2018-08-24 23:38:14 -070022#include "mlir/IR/AffineExpr.h"
Uday Bondhugula15984952018-08-01 22:36:12 -070023#include "mlir/IR/Attributes.h"
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070024#include "mlir/IR/Builders.h"
25#include "mlir/IR/CFGFunction.h"
26#include "mlir/IR/MLFunction.h"
27#include "mlir/IR/Module.h"
28#include "mlir/IR/OperationSet.h"
Uday Bondhugula84b80952018-08-03 13:22:26 -070029#include "mlir/IR/StandardOps.h"
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070030#include "mlir/IR/Statements.h"
31#include "mlir/IR/StmtVisitor.h"
Uday Bondhugula6c1f6602018-08-13 17:25:13 -070032#include "mlir/Transforms/Pass.h"
Chris Lattneree0c2ae2018-07-29 12:37:35 -070033#include "mlir/Transforms/Passes.h"
Chris Lattnere787b322018-08-08 11:14:57 -070034#include "llvm/ADT/DenseMap.h"
Uday Bondhugula67701712018-08-21 16:01:23 -070035#include "llvm/Support/CommandLine.h"
Uday Bondhugula081d9e72018-07-27 10:58:14 -070036#include "llvm/Support/raw_ostream.h"
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070037
38using namespace mlir;
Uday Bondhugula67701712018-08-21 16:01:23 -070039using namespace llvm;
40
41// Loop unrolling factor.
42static llvm::cl::opt<unsigned>
43 clUnrollFactor("unroll-factor", cl::Hidden,
44 cl::desc("Use this unroll factor for all loops"));
45
46static llvm::cl::opt<bool> clUnrollFull("unroll-full", cl::Hidden,
47 cl::desc("Fully unroll loops"));
48
49static llvm::cl::opt<unsigned> clUnrollFullThreshold(
50 "unroll-full-threshold", cl::Hidden,
51 cl::desc("Unroll all loops with trip count less than or equal to this"));
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070052
53namespace {
Uday Bondhugula67701712018-08-21 16:01:23 -070054/// Loop unrolling pass. Unrolls all innermost loops unless full unrolling and a
55/// full unroll threshold was specified, in which case, fully unrolls all loops
56/// with trip count less than the specified threshold. The latter is for testing
57/// purposes, especially for testing outer loop unrolling.
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070058struct LoopUnroll : public MLFunctionPass {
Uday Bondhugula67701712018-08-21 16:01:23 -070059 Optional<unsigned> unrollFactor;
60 Optional<bool> unrollFull;
Uday Bondhugula0077e622018-08-16 13:51:44 -070061
Uday Bondhugula67701712018-08-21 16:01:23 -070062 explicit LoopUnroll(Optional<unsigned> unrollFactor,
63 Optional<bool> unrollFull)
64 : unrollFactor(unrollFactor), unrollFull(unrollFull) {}
65
Uday Bondhugula134154e2018-08-06 18:40:34 -070066 void runOnMLFunction(MLFunction *f) override;
Uday Bondhugula67701712018-08-21 16:01:23 -070067 /// Unroll this for stmt. Returns false if nothing was done.
68 bool runOnForStmt(ForStmt *forStmt);
69 bool loopUnrollFull(ForStmt *forStmt);
70 bool loopUnrollByFactor(ForStmt *forStmt, unsigned unrollFactor);
Uday Bondhugula134154e2018-08-06 18:40:34 -070071};
Chris Lattneree0c2ae2018-07-29 12:37:35 -070072} // end anonymous namespace
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070073
Uday Bondhugula67701712018-08-21 16:01:23 -070074MLFunctionPass *mlir::createLoopUnrollPass(int unrollFactor, int unrollFull) {
75 return new LoopUnroll(unrollFactor == -1 ? None
76 : Optional<unsigned>(unrollFactor),
77 unrollFull == -1 ? None : Optional<bool>(unrollFull));
Uday Bondhugula134154e2018-08-06 18:40:34 -070078}
79
Chris Lattneree0c2ae2018-07-29 12:37:35 -070080void LoopUnroll::runOnMLFunction(MLFunction *f) {
Uday Bondhugula081d9e72018-07-27 10:58:14 -070081 // Gathers all innermost loops through a post order pruned walk.
Uday Bondhugula081d9e72018-07-27 10:58:14 -070082 class InnermostLoopGatherer : public StmtWalker<InnermostLoopGatherer, bool> {
83 public:
84 // Store innermost loops as we walk.
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070085 std::vector<ForStmt *> loops;
Uday Bondhugula081d9e72018-07-27 10:58:14 -070086
87 // This method specialized to encode custom return logic.
88 typedef llvm::iplist<Statement> StmtListType;
Uday Bondhugula8572d1a2018-07-30 10:49:49 -070089 bool walkPostOrder(StmtListType::iterator Start,
90 StmtListType::iterator End) {
Uday Bondhugula15984952018-08-01 22:36:12 -070091 bool hasInnerLoops = false;
92 // We need to walk all elements since all innermost loops need to be
93 // gathered as opposed to determining whether this list has any inner
94 // loops or not.
Uday Bondhugula081d9e72018-07-27 10:58:14 -070095 while (Start != End)
Uday Bondhugula15984952018-08-01 22:36:12 -070096 hasInnerLoops |= walkPostOrder(&(*Start++));
97 return hasInnerLoops;
Uday Bondhugula0b4059b2018-07-24 20:01:16 -070098 }
Uday Bondhugula081d9e72018-07-27 10:58:14 -070099
Uday Bondhugula8572d1a2018-07-30 10:49:49 -0700100 bool walkForStmtPostOrder(ForStmt *forStmt) {
101 bool hasInnerLoops = walkPostOrder(forStmt->begin(), forStmt->end());
Uday Bondhugula081d9e72018-07-27 10:58:14 -0700102 if (!hasInnerLoops)
103 loops.push_back(forStmt);
104 return true;
105 }
106
Uday Bondhugula8572d1a2018-07-30 10:49:49 -0700107 bool walkIfStmtPostOrder(IfStmt *ifStmt) {
Chris Lattnere787b322018-08-08 11:14:57 -0700108 bool hasInnerLoops =
109 walkPostOrder(ifStmt->getThen()->begin(), ifStmt->getThen()->end());
110 hasInnerLoops |=
111 walkPostOrder(ifStmt->getElse()->begin(), ifStmt->getElse()->end());
Uday Bondhugula15984952018-08-01 22:36:12 -0700112 return hasInnerLoops;
Uday Bondhugula081d9e72018-07-27 10:58:14 -0700113 }
114
Uday Bondhugula134154e2018-08-06 18:40:34 -0700115 bool visitOperationStmt(OperationStmt *opStmt) { return false; }
Uday Bondhugula081d9e72018-07-27 10:58:14 -0700116
Uday Bondhugula134154e2018-08-06 18:40:34 -0700117 // FIXME: can't use base class method for this because that in turn would
118 // need to use the derived class method above. CRTP doesn't allow it, and
119 // the compiler error resulting from it is also misleading.
Uday Bondhugula8572d1a2018-07-30 10:49:49 -0700120 using StmtWalker<InnermostLoopGatherer, bool>::walkPostOrder;
Uday Bondhugula0b4059b2018-07-24 20:01:16 -0700121 };
122
Uday Bondhugula134154e2018-08-06 18:40:34 -0700123 // Gathers all loops with trip count <= minTripCount.
124 class ShortLoopGatherer : public StmtWalker<ShortLoopGatherer> {
125 public:
126 // Store short loops as we walk.
127 std::vector<ForStmt *> loops;
128 const unsigned minTripCount;
129 ShortLoopGatherer(unsigned minTripCount) : minTripCount(minTripCount) {}
Uday Bondhugula15984952018-08-01 22:36:12 -0700130
Uday Bondhugula134154e2018-08-06 18:40:34 -0700131 void visitForStmt(ForStmt *forStmt) {
Tatiana Shpeismande8829f2018-08-24 23:38:14 -0700132 if (!forStmt->hasConstantBounds())
133 return;
134 auto lb = forStmt->getConstantLowerBound();
135 auto ub = forStmt->getConstantUpperBound();
Uday Bondhugula67701712018-08-21 16:01:23 -0700136 auto step = forStmt->getStep();
Uday Bondhugula15984952018-08-01 22:36:12 -0700137
Uday Bondhugula134154e2018-08-06 18:40:34 -0700138 if ((ub - lb) / step + 1 <= minTripCount)
139 loops.push_back(forStmt);
Uday Bondhugula15984952018-08-01 22:36:12 -0700140 }
141 };
142
Uday Bondhugula67701712018-08-21 16:01:23 -0700143 if (clUnrollFull.getNumOccurrences() > 0 &&
144 clUnrollFullThreshold.getNumOccurrences() > 0) {
145 ShortLoopGatherer slg(clUnrollFullThreshold);
146 // Do a post order walk so that loops are gathered from innermost to
147 // outermost (or else unrolling an outer one may delete gathered inner
148 // ones).
149 slg.walkPostOrder(f);
150 auto &loops = slg.loops;
151 for (auto *forStmt : loops)
152 loopUnrollFull(forStmt);
153 return;
154 }
155
156 InnermostLoopGatherer ilg;
157 ilg.walkPostOrder(f);
158 auto &loops = ilg.loops;
Uday Bondhugula134154e2018-08-06 18:40:34 -0700159 for (auto *forStmt : loops)
160 runOnForStmt(forStmt);
161}
162
Uday Bondhugula67701712018-08-21 16:01:23 -0700163/// Unroll a for stmt. Default unroll factor is 4.
164bool LoopUnroll::runOnForStmt(ForStmt *forStmt) {
Uday Bondhugula6cd35022018-08-28 18:24:27 -0700165 // Unroll by the factor passed, if any.
166 if (unrollFactor.hasValue())
167 return loopUnrollByFactor(forStmt, unrollFactor.getValue());
168 // Unroll by the command line factor if one was specified.
169 if (clUnrollFactor.getNumOccurrences() > 0)
170 return loopUnrollByFactor(forStmt, clUnrollFactor);
Uday Bondhugula67701712018-08-21 16:01:23 -0700171 // Unroll completely if full loop unroll was specified.
172 if (clUnrollFull.getNumOccurrences() > 0 ||
173 (unrollFull.hasValue() && unrollFull.getValue()))
174 return loopUnrollFull(forStmt);
175
Uday Bondhugula67701712018-08-21 16:01:23 -0700176 // Unroll by four otherwise.
177 return loopUnrollByFactor(forStmt, 4);
178}
179
Tatiana Shpeismande8829f2018-08-24 23:38:14 -0700180// Unrolls this loop completely. Fails assertion if loop bounds are
181// non-constant.
Uday Bondhugula67701712018-08-21 16:01:23 -0700182bool LoopUnroll::loopUnrollFull(ForStmt *forStmt) {
Tatiana Shpeismande8829f2018-08-24 23:38:14 -0700183 auto lb = forStmt->getConstantLowerBound();
184 auto ub = forStmt->getConstantUpperBound();
Uday Bondhugula67701712018-08-21 16:01:23 -0700185 auto step = forStmt->getStep();
Uday Bondhugula0b4059b2018-07-24 20:01:16 -0700186
Tatiana Shpeismande8829f2018-08-24 23:38:14 -0700187 // Builder to add constants needed for the unrolled iterator.
Chris Lattnere787b322018-08-08 11:14:57 -0700188 auto *mlFunc = forStmt->findFunction();
189 MLFuncBuilder funcTopBuilder(&mlFunc->front());
Uday Bondhugula0b4059b2018-07-24 20:01:16 -0700190
Chris Lattnere787b322018-08-08 11:14:57 -0700191 // Builder to insert the unrolled bodies. We insert right after the
Tatiana Shpeismande8829f2018-08-24 23:38:14 -0700192 // ForStmt we're unrolling.
Chris Lattnere787b322018-08-08 11:14:57 -0700193 MLFuncBuilder builder(forStmt->getBlock(), ++StmtBlock::iterator(forStmt));
Uday Bondhugula84b80952018-08-03 13:22:26 -0700194
195 // Unroll the contents of 'forStmt'.
Uday Bondhugula134154e2018-08-06 18:40:34 -0700196 for (int64_t i = lb; i <= ub; i += step) {
Chris Lattnere787b322018-08-08 11:14:57 -0700197 DenseMap<const MLValue *, MLValue *> operandMapping;
198
199 // If the induction variable is used, create a constant for this unrolled
200 // value and add an operand mapping for it.
Uday Bondhugula134154e2018-08-06 18:40:34 -0700201 if (!forStmt->use_empty()) {
Chris Lattnere787b322018-08-08 11:14:57 -0700202 auto *ivConst =
Chris Lattner1628fa02018-08-23 14:32:25 -0700203 funcTopBuilder.create<ConstantAffineIntOp>(forStmt->getLoc(), i)
204 ->getResult();
Chris Lattnere787b322018-08-08 11:14:57 -0700205 operandMapping[forStmt] = cast<MLValue>(ivConst);
Uday Bondhugula134154e2018-08-06 18:40:34 -0700206 }
Uday Bondhugula84b80952018-08-03 13:22:26 -0700207
Chris Lattnere787b322018-08-08 11:14:57 -0700208 // Clone the body of the loop.
209 for (auto &childStmt : *forStmt) {
Uday Bondhugula67701712018-08-21 16:01:23 -0700210 builder.clone(childStmt, operandMapping);
Uday Bondhugula0b4059b2018-07-24 20:01:16 -0700211 }
212 }
Uday Bondhugula134154e2018-08-06 18:40:34 -0700213 // Erase the original 'for' stmt from the block.
Uday Bondhugula0b4059b2018-07-24 20:01:16 -0700214 forStmt->eraseFromBlock();
Uday Bondhugula67701712018-08-21 16:01:23 -0700215 return true;
216}
217
218/// Unrolls this loop by the specified unroll factor.
219bool LoopUnroll::loopUnrollByFactor(ForStmt *forStmt, unsigned unrollFactor) {
220 assert(unrollFactor >= 1 && "unroll factor shoud be >= 1");
221
222 if (unrollFactor == 1 || forStmt->getStatements().empty())
223 return false;
224
Tatiana Shpeismande8829f2018-08-24 23:38:14 -0700225 if (!forStmt->hasConstantBounds())
226 return false;
227
228 auto lb = forStmt->getConstantLowerBound();
229 auto ub = forStmt->getConstantUpperBound();
Uday Bondhugula67701712018-08-21 16:01:23 -0700230 auto step = forStmt->getStep();
231
232 int64_t tripCount = (int64_t)ceilf((ub - lb + 1) / (float)step);
233
234 // If the trip count is lower than the unroll factor, no unrolled body.
235 // TODO(bondhugula): option to specify cleanup loop unrolling.
236 if (tripCount < unrollFactor)
237 return true;
238
239 // Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
240 if (tripCount % unrollFactor) {
241 DenseMap<const MLValue *, MLValue *> operandMap;
242 MLFuncBuilder builder(forStmt->getBlock(), ++StmtBlock::iterator(forStmt));
243 auto *cleanupForStmt = cast<ForStmt>(builder.clone(*forStmt, operandMap));
Tatiana Shpeismande8829f2018-08-24 23:38:14 -0700244 cleanupForStmt->setConstantLowerBound(
245 lb + (tripCount - tripCount % unrollFactor) * step);
Uday Bondhugula67701712018-08-21 16:01:23 -0700246 }
247
248 // Builder to insert unrolled bodies right after the last statement in the
249 // body of 'forStmt'.
250 MLFuncBuilder builder(forStmt, StmtBlock::iterator(forStmt->end()));
251 forStmt->setStep(step * unrollFactor);
Tatiana Shpeismande8829f2018-08-24 23:38:14 -0700252 forStmt->setConstantUpperBound(
253 lb + (tripCount - tripCount % unrollFactor - 1) * step);
Uday Bondhugula67701712018-08-21 16:01:23 -0700254
255 // Keep a pointer to the last statement in the original block so that we know
256 // what to clone (since we are doing this in-place).
257 StmtBlock::iterator srcBlockEnd = --forStmt->end();
258
259 // Unroll the contents of 'forStmt' (unrollFactor-1 additional copies
260 // appended).
261 for (unsigned i = 1; i < unrollFactor; i++) {
262 DenseMap<const MLValue *, MLValue *> operandMapping;
263
264 // If the induction variable is used, create a remapping to the value for
265 // this unrolled instance.
266 if (!forStmt->use_empty()) {
267 // iv' = iv + 1/2/3...unrollFactor-1;
268 auto *bumpExpr = builder.getAddExpr(builder.getDimExpr(0),
269 builder.getConstantExpr(i * step));
270 auto *bumpMap = builder.getAffineMap(1, 0, {bumpExpr}, {});
271 auto *ivUnroll =
Chris Lattner1628fa02018-08-23 14:32:25 -0700272 builder.create<AffineApplyOp>(forStmt->getLoc(), bumpMap, forStmt)
273 ->getResult(0);
Uday Bondhugula67701712018-08-21 16:01:23 -0700274 operandMapping[forStmt] = cast<MLValue>(ivUnroll);
275 }
276
277 // Clone the original body of the loop (this doesn't include the last stmt).
278 for (auto it = forStmt->begin(); it != srcBlockEnd; it++) {
279 builder.clone(*it, operandMapping);
280 }
281 // Clone the last statement in the original body.
282 builder.clone(*srcBlockEnd, operandMapping);
283 }
284 return true;
Uday Bondhugula0b4059b2018-07-24 20:01:16 -0700285}