| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 1 | //===- LoopUtils.cpp ---- Misc utilities for loop transformation ----------===// | 
| Uday Bondhugula | 3bae041 | 2018-09-07 14:47:21 -0700 | [diff] [blame] | 2 | // | 
| Mehdi Amini | 56222a0 | 2019-12-23 09:35:36 -0800 | [diff] [blame] | 3 | // Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|  | 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
| Uday Bondhugula | 3bae041 | 2018-09-07 14:47:21 -0700 | [diff] [blame] | 6 | // | 
| Mehdi Amini | 56222a0 | 2019-12-23 09:35:36 -0800 | [diff] [blame] | 7 | //===----------------------------------------------------------------------===// | 
| Uday Bondhugula | 3bae041 | 2018-09-07 14:47:21 -0700 | [diff] [blame] | 8 | // | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 9 | // This file implements miscellaneous loop transformation routines. | 
| Uday Bondhugula | 3bae041 | 2018-09-07 14:47:21 -0700 | [diff] [blame] | 10 | // | 
|  | 11 | //===----------------------------------------------------------------------===// | 
|  | 12 |  | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 13 | #include "mlir/Transforms/LoopUtils.h" | 
| Uday Bondhugula | 64812a5 | 2018-09-12 10:21:23 -0700 | [diff] [blame] | 14 |  | 
| MLIR Team | 8f5f2c7 | 2019-02-15 09:32:18 -0800 | [diff] [blame] | 15 | #include "mlir/Analysis/AffineAnalysis.h" | 
| Uday Bondhugula | 64812a5 | 2018-09-12 10:21:23 -0700 | [diff] [blame] | 16 | #include "mlir/Analysis/LoopAnalysis.h" | 
| Nicolas Vasilache | 48a1bae | 2019-07-22 04:30:50 -0700 | [diff] [blame] | 17 | #include "mlir/Analysis/SliceAnalysis.h" | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 18 | #include "mlir/Analysis/Utils.h" | 
| River Riddle | ffde975 | 2019-08-20 15:36:08 -0700 | [diff] [blame] | 19 | #include "mlir/Dialect/AffineOps/AffineOps.h" | 
| Nicolas Vasilache | cca53e8 | 2019-07-15 02:50:09 -0700 | [diff] [blame] | 20 | #include "mlir/Dialect/LoopOps/LoopOps.h" | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 21 | #include "mlir/IR/AffineMap.h" | 
| River Riddle | 451869f | 2019-01-24 12:25:30 -0800 | [diff] [blame] | 22 | #include "mlir/IR/BlockAndValueMapping.h" | 
| River Riddle | 9dbef0b | 2019-07-11 11:41:04 -0700 | [diff] [blame] | 23 | #include "mlir/IR/Function.h" | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 24 | #include "mlir/Transforms/RegionUtils.h" | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 25 | #include "mlir/Transforms/Utils.h" | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 26 | #include "llvm/ADT/DenseMap.h" | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 27 | #include "llvm/ADT/MapVector.h" | 
| Nicolas Vasilache | 48a1bae | 2019-07-22 04:30:50 -0700 | [diff] [blame] | 28 | #include "llvm/ADT/SetVector.h" | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 29 | #include "llvm/ADT/SmallPtrSet.h" | 
| Uday Bondhugula | ccfe593 | 2018-10-22 13:44:31 -0700 | [diff] [blame] | 30 | #include "llvm/Support/Debug.h" | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 31 | #include "llvm/Support/raw_ostream.h" | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 32 |  | 
| Uday Bondhugula | ccfe593 | 2018-10-22 13:44:31 -0700 | [diff] [blame] | 33 | #define DEBUG_TYPE "LoopUtils" | 
| Uday Bondhugula | 3bae041 | 2018-09-07 14:47:21 -0700 | [diff] [blame] | 34 |  | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 35 | using namespace mlir; | 
| Nicolas Vasilache | 48a1bae | 2019-07-22 04:30:50 -0700 | [diff] [blame] | 36 | using llvm::SetVector; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 37 | using llvm::SmallMapVector; | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 38 |  | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 39 | /// Computes the cleanup loop lower bound of the loop being unrolled with | 
|  | 40 | /// the specified unroll factor; this bound will also be upper bound of the main | 
|  | 41 | /// part of the unrolled loop. Computes the bound as an AffineMap with its | 
|  | 42 | /// operands or a null map when the trip count can't be expressed as an affine | 
|  | 43 | /// expression. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 44 | void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor, | 
|  | 45 | AffineMap *map, | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 46 | SmallVectorImpl<Value> *operands, | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 47 | OpBuilder &b) { | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 48 | auto lbMap = forOp.getLowerBoundMap(); | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 49 |  | 
|  | 50 | // Single result lower bound map only. | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 51 | if (lbMap.getNumResults() != 1) { | 
|  | 52 | *map = AffineMap(); | 
|  | 53 | return; | 
|  | 54 | } | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 55 |  | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 56 | AffineMap tripCountMap; | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 57 | SmallVector<Value, 4> tripCountOperands; | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 58 | buildTripCountMapAndOperands(forOp, &tripCountMap, &tripCountOperands); | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 59 |  | 
|  | 60 | // Sometimes the trip count cannot be expressed as an affine expression. | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 61 | if (!tripCountMap) { | 
|  | 62 | *map = AffineMap(); | 
|  | 63 | return; | 
|  | 64 | } | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 65 |  | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 66 | unsigned step = forOp.getStep(); | 
| River Riddle | d6ee6a0 | 2019-12-07 10:35:01 -0800 | [diff] [blame] | 67 | auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap, | 
|  | 68 | forOp.getLowerBoundOperands()); | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 69 |  | 
|  | 70 | // For each upper bound expr, get the range. | 
| River Riddle | 832567b | 2019-03-25 10:14:34 -0700 | [diff] [blame] | 71 | // Eg: affine.for %i = lb to min (ub1, ub2), | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 72 | // where tripCountExprs yield (tr1, tr2), we create affine.apply's: | 
|  | 73 | // lb + tr1 - tr1 % ufactor, lb + tr2 - tr2 % ufactor; the results of all | 
|  | 74 | // these affine.apply's make up the cleanup loop lower bound. | 
|  | 75 | SmallVector<AffineExpr, 4> bumpExprs(tripCountMap.getNumResults()); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 76 | SmallVector<Value, 4> bumpValues(tripCountMap.getNumResults()); | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 77 | for (unsigned i = 0, e = tripCountMap.getNumResults(); i < e; i++) { | 
|  | 78 | auto tripCountExpr = tripCountMap.getResult(i); | 
|  | 79 | bumpExprs[i] = (tripCountExpr - tripCountExpr % unrollFactor) * step; | 
| River Riddle | 2acc220 | 2019-10-17 20:08:01 -0700 | [diff] [blame] | 80 | auto bumpMap = AffineMap::get(tripCountMap.getNumDims(), | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 81 | tripCountMap.getNumSymbols(), bumpExprs[i]); | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 82 | bumpValues[i] = | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 83 | b.create<AffineApplyOp>(forOp.getLoc(), bumpMap, tripCountOperands); | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 84 | } | 
|  | 85 |  | 
|  | 86 | SmallVector<AffineExpr, 4> newUbExprs(tripCountMap.getNumResults()); | 
|  | 87 | for (unsigned i = 0, e = bumpExprs.size(); i < e; i++) | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 88 | newUbExprs[i] = b.getAffineDimExpr(0) + b.getAffineDimExpr(i + 1); | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 89 |  | 
|  | 90 | operands->clear(); | 
|  | 91 | operands->push_back(lb); | 
|  | 92 | operands->append(bumpValues.begin(), bumpValues.end()); | 
| River Riddle | 2acc220 | 2019-10-17 20:08:01 -0700 | [diff] [blame] | 93 | *map = AffineMap::get(1 + tripCountMap.getNumResults(), 0, newUbExprs); | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 94 | // Simplify the map + operands. | 
|  | 95 | fullyComposeAffineMapAndOperands(map, operands); | 
|  | 96 | *map = simplifyAffineMap(*map); | 
|  | 97 | canonicalizeMapAndOperands(map, operands); | 
|  | 98 | // Remove any affine.apply's that became dead from the simplification above. | 
| River Riddle | 35807bc | 2019-12-22 21:59:55 -0800 | [diff] [blame] | 99 | for (auto v : bumpValues) { | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 100 | if (v.use_empty()) | 
|  | 101 | v.getDefiningOp()->erase(); | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 102 | } | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 103 | if (lb.use_empty()) | 
|  | 104 | lb.erase(); | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 105 | } | 
|  | 106 |  | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 107 | /// Promotes the loop body of a forOp to its containing block if the forOp | 
| River Riddle | ba6fdc8 | 2019-03-06 17:37:14 -0800 | [diff] [blame] | 108 | /// was known to have a single iteration. | 
| Uday Bondhugula | 64812a5 | 2018-09-12 10:21:23 -0700 | [diff] [blame] | 109 | // TODO(bondhugula): extend this for arbitrary affine bounds. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 110 | LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) { | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 111 | Optional<uint64_t> tripCount = getConstantTripCount(forOp); | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 112 | if (!tripCount.hasValue() || tripCount.getValue() != 1) | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 113 | return failure(); | 
| Uday Bondhugula | 3bae041 | 2018-09-07 14:47:21 -0700 | [diff] [blame] | 114 |  | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 115 | // TODO(mlir-team): there is no builder for a max. | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 116 | if (forOp.getLowerBoundMap().getNumResults() != 1) | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 117 | return failure(); | 
| Uday Bondhugula | 3bae041 | 2018-09-07 14:47:21 -0700 | [diff] [blame] | 118 |  | 
|  | 119 | // Replaces all IV uses to its single iteration value. | 
| River Riddle | 35807bc | 2019-12-22 21:59:55 -0800 | [diff] [blame] | 120 | auto iv = forOp.getInductionVar(); | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 121 | Operation *op = forOp.getOperation(); | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 122 | if (!iv.use_empty()) { | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 123 | if (forOp.hasConstantLowerBound()) { | 
| River Riddle | ce502af | 2019-07-08 11:20:26 -0700 | [diff] [blame] | 124 | OpBuilder topBuilder(op->getParentOfType<FuncOp>().getBody()); | 
| Chris Lattner | d2d89cb | 2018-10-06 17:21:53 -0700 | [diff] [blame] | 125 | auto constOp = topBuilder.create<ConstantIndexOp>( | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 126 | forOp.getLoc(), forOp.getConstantLowerBound()); | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 127 | iv.replaceAllUsesWith(constOp); | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 128 | } else { | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 129 | AffineBound lb = forOp.getLowerBound(); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 130 | SmallVector<Value, 4> lbOperands(lb.operand_begin(), lb.operand_end()); | 
| River Riddle | f1b848e | 2019-06-04 19:18:23 -0700 | [diff] [blame] | 131 | OpBuilder builder(op->getBlock(), Block::iterator(op)); | 
| Uday Bondhugula | 94a03f8 | 2019-01-22 13:58:52 -0800 | [diff] [blame] | 132 | if (lb.getMap() == builder.getDimIdentityMap()) { | 
| River Riddle | 3227dee | 2019-02-06 11:08:18 -0800 | [diff] [blame] | 133 | // No need of generating an affine.apply. | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 134 | iv.replaceAllUsesWith(lbOperands[0]); | 
| Uday Bondhugula | 94a03f8 | 2019-01-22 13:58:52 -0800 | [diff] [blame] | 135 | } else { | 
|  | 136 | auto affineApplyOp = builder.create<AffineApplyOp>( | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 137 | op->getLoc(), lb.getMap(), lbOperands); | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 138 | iv.replaceAllUsesWith(affineApplyOp); | 
| Uday Bondhugula | 94a03f8 | 2019-01-22 13:58:52 -0800 | [diff] [blame] | 139 | } | 
| Uday Bondhugula | ab47972 | 2018-09-18 10:22:03 -0700 | [diff] [blame] | 140 | } | 
|  | 141 | } | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 142 | // Move the loop body operations, except for terminator, to the loop's | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 143 | // containing block. | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 144 | auto *block = op->getBlock(); | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 145 | forOp.getBody()->getOperations().back().erase(); | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 146 | block->getOperations().splice(Block::iterator(op), | 
| River Riddle | f9d9153 | 2019-03-26 17:05:09 -0700 | [diff] [blame] | 147 | forOp.getBody()->getOperations()); | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 148 | forOp.erase(); | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 149 | return success(); | 
| Uday Bondhugula | 3bae041 | 2018-09-07 14:47:21 -0700 | [diff] [blame] | 150 | } | 
|  | 151 |  | 
| River Riddle | 8c44367 | 2019-07-09 16:17:55 -0700 | [diff] [blame] | 152 | /// Promotes all single iteration for op's in the FuncOp, i.e., moves | 
| Chris Lattner | 315a466 | 2018-12-28 13:07:39 -0800 | [diff] [blame] | 153 | /// their body into the containing Block. | 
| River Riddle | 8c44367 | 2019-07-09 16:17:55 -0700 | [diff] [blame] | 154 | void mlir::promoteSingleIterationLoops(FuncOp f) { | 
| Uday Bondhugula | 3bae041 | 2018-09-07 14:47:21 -0700 | [diff] [blame] | 155 | // Gathers all innermost loops through a post order pruned walk. | 
| River Riddle | 4bfae66 | 2019-08-29 13:04:22 -0700 | [diff] [blame] | 156 | f.walk([](AffineForOp forOp) { promoteIfSingleIteration(forOp); }); | 
| Uday Bondhugula | 3bae041 | 2018-09-07 14:47:21 -0700 | [diff] [blame] | 157 | } | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 158 |  | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 159 | /// Generates a 'affine.for' op with the specified lower and upper bounds | 
|  | 160 | /// while generating the right IV remappings for the shifted operations. The | 
|  | 161 | /// operation blocks that go into the loop are specified in instGroupQueue | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 162 | /// starting from the specified offset, and in that order; the first element of | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 163 | /// the pair specifies the shift applied to that group of operations; note | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 164 | /// that the shift is multiplied by the loop step before being applied. Returns | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 165 | /// nullptr if the generated loop simplifies to a single iteration one. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 166 | static AffineForOp | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 167 | generateLoop(AffineMap lbMap, AffineMap ubMap, | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 168 | const std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 169 | &instGroupQueue, | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 170 | unsigned offset, AffineForOp srcForInst, OpBuilder b) { | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 171 | SmallVector<Value, 4> lbOperands(srcForInst.getLowerBoundOperands()); | 
|  | 172 | SmallVector<Value, 4> ubOperands(srcForInst.getUpperBoundOperands()); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 173 |  | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 174 | assert(lbMap.getNumInputs() == lbOperands.size()); | 
|  | 175 | assert(ubMap.getNumInputs() == ubOperands.size()); | 
|  | 176 |  | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 177 | auto loopChunk = | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 178 | b.create<AffineForOp>(srcForInst.getLoc(), lbOperands, lbMap, ubOperands, | 
|  | 179 | ubMap, srcForInst.getStep()); | 
| River Riddle | 35807bc | 2019-12-22 21:59:55 -0800 | [diff] [blame] | 180 | auto loopChunkIV = loopChunk.getInductionVar(); | 
|  | 181 | auto srcIV = srcForInst.getInductionVar(); | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 182 |  | 
| River Riddle | 451869f | 2019-01-24 12:25:30 -0800 | [diff] [blame] | 183 | BlockAndValueMapping operandMap; | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 184 |  | 
| River Riddle | f1b848e | 2019-06-04 19:18:23 -0700 | [diff] [blame] | 185 | OpBuilder bodyBuilder = loopChunk.getBodyBuilder(); | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 186 | for (auto it = instGroupQueue.begin() + offset, e = instGroupQueue.end(); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 187 | it != e; ++it) { | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 188 | uint64_t shift = it->first; | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 189 | auto insts = it->second; | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 190 | // All 'same shift' operations get added with their operands being | 
|  | 191 | // remapped to results of cloned operations, and their IV used remapped. | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 192 | // Generate the remapping if the shift is not zero: remappedIV = newIV - | 
|  | 193 | // shift. | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 194 | if (!srcIV.use_empty() && shift != 0) { | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 195 | auto ivRemap = bodyBuilder.create<AffineApplyOp>( | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 196 | srcForInst.getLoc(), | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 197 | bodyBuilder.getSingleDimShiftAffineMap( | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 198 | -static_cast<int64_t>(srcForInst.getStep() * shift)), | 
| Chris Lattner | b42bea2 | 2019-01-27 09:33:19 -0800 | [diff] [blame] | 199 | loopChunkIV); | 
| River Riddle | 36babbd | 2019-01-26 12:40:12 -0800 | [diff] [blame] | 200 | operandMap.map(srcIV, ivRemap); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 201 | } else { | 
| River Riddle | 36babbd | 2019-01-26 12:40:12 -0800 | [diff] [blame] | 202 | operandMap.map(srcIV, loopChunkIV); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 203 | } | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 204 | for (auto *op : insts) { | 
| River Riddle | d5b60ee | 2019-05-11 18:59:54 -0700 | [diff] [blame] | 205 | if (!isa<AffineTerminatorOp>(op)) | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 206 | bodyBuilder.clone(*op, operandMap); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 207 | } | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 208 | }; | 
| River Riddle | ba6fdc8 | 2019-03-06 17:37:14 -0800 | [diff] [blame] | 209 | if (succeeded(promoteIfSingleIteration(loopChunk))) | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 210 | return AffineForOp(); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 211 | return loopChunk; | 
|  | 212 | } | 
|  | 213 |  | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 214 | /// Skew the operations in the body of a 'affine.for' operation with the | 
|  | 215 | /// specified operation-wise shifts. The shifts are with respect to the | 
| River Riddle | 832567b | 2019-03-25 10:14:34 -0700 | [diff] [blame] | 216 | /// original execution order, and are multiplied by the loop 'step' before being | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 217 | /// applied. A shift of zero for each operation will lead to no change. | 
|  | 218 | // The skewing of operations with respect to one another can be used for | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 219 | // example to allow overlap of asynchronous operations (such as DMA | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 220 | // communication) with computation, or just relative shifting of operations | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 221 | // for better register reuse, locality or parallelism. As such, the shifts are | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 222 | // typically expected to be at most of the order of the number of operations. | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 223 | // This method should not be used as a substitute for loop distribution/fission. | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 224 | // This method uses an algorithm// in time linear in the number of operations | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 225 | // in the body of the for loop - (using the 'sweep line' paradigm). This method | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 226 | // asserts preservation of SSA dominance. A check for that as well as that for | 
| Kazuaki Ishizaki | 8bfedb3 | 2019-10-20 00:11:03 -0700 | [diff] [blame] | 227 | // memory-based dependence preservation check rests with the users of this | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 228 | // method. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 229 | LogicalResult mlir::instBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts, | 
| River Riddle | 80d3568 | 2019-03-08 16:04:42 -0800 | [diff] [blame] | 230 | bool unrollPrologueEpilogue) { | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 231 | if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end())) | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 232 | return success(); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 233 |  | 
|  | 234 | // If the trip counts aren't constant, we would need versioning and | 
|  | 235 | // conditional guards (or context information to prevent such versioning). The | 
|  | 236 | // better way to pipeline for such loops is to first tile them and extract | 
|  | 237 | // constant trip count "full tiles" before applying this. | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 238 | auto mayBeConstTripCount = getConstantTripCount(forOp); | 
| Uday Bondhugula | ccfe593 | 2018-10-22 13:44:31 -0700 | [diff] [blame] | 239 | if (!mayBeConstTripCount.hasValue()) { | 
| River Riddle | b14c4b4 | 2019-05-01 12:13:44 -0700 | [diff] [blame] | 240 | LLVM_DEBUG(forOp.emitRemark("non-constant trip count loop not handled")); | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 241 | return success(); | 
| Uday Bondhugula | ccfe593 | 2018-10-22 13:44:31 -0700 | [diff] [blame] | 242 | } | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 243 | uint64_t tripCount = mayBeConstTripCount.getValue(); | 
|  | 244 |  | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 245 | assert(isInstwiseShiftValid(forOp, shifts) && | 
| Uday Bondhugula | ccfe593 | 2018-10-22 13:44:31 -0700 | [diff] [blame] | 246 | "shifts will lead to an invalid transformation\n"); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 247 |  | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 248 | int64_t step = forOp.getStep(); | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 249 |  | 
| River Riddle | f9d9153 | 2019-03-26 17:05:09 -0700 | [diff] [blame] | 250 | unsigned numChildInsts = forOp.getBody()->getOperations().size(); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 251 |  | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 252 | // Do a linear time (counting) sort for the shifts. | 
|  | 253 | uint64_t maxShift = 0; | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 254 | for (unsigned i = 0; i < numChildInsts; i++) { | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 255 | maxShift = std::max(maxShift, shifts[i]); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 256 | } | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 257 | // Such large shifts are not the typical use case. | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 258 | if (maxShift >= numChildInsts) { | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 259 | forOp.emitWarning("not shifting because shifts are unrealistically large"); | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 260 | return success(); | 
| Uday Bondhugula | ccfe593 | 2018-10-22 13:44:31 -0700 | [diff] [blame] | 261 | } | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 262 |  | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 263 | // An array of operation groups sorted by shift amount; each group has all | 
|  | 264 | // operations with the same shift in the order in which they appear in the | 
|  | 265 | // body of the 'affine.for' op. | 
|  | 266 | std::vector<std::vector<Operation *>> sortedInstGroups(maxShift + 1); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 267 | unsigned pos = 0; | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 268 | for (auto &op : *forOp.getBody()) { | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 269 | auto shift = shifts[pos++]; | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 270 | sortedInstGroups[shift].push_back(&op); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 271 | } | 
|  | 272 |  | 
|  | 273 | // Unless the shifts have a specific pattern (which actually would be the | 
|  | 274 | // common use case), prologue and epilogue are not meaningfully defined. | 
|  | 275 | // Nevertheless, if 'unrollPrologueEpilogue' is set, we will treat the first | 
|  | 276 | // loop generated as the prologue and the last as epilogue and unroll these | 
|  | 277 | // fully. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 278 | AffineForOp prologue; | 
|  | 279 | AffineForOp epilogue; | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 280 |  | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 281 | // Do a sweep over the sorted shifts while storing open groups in a | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 282 | // vector, and generating loop portions as necessary during the sweep. A block | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 283 | // of operations is paired with its shift. | 
|  | 284 | std::vector<std::pair<uint64_t, ArrayRef<Operation *>>> instGroupQueue; | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 285 |  | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 286 | auto origLbMap = forOp.getLowerBoundMap(); | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 287 | uint64_t lbShift = 0; | 
| River Riddle | f1b848e | 2019-06-04 19:18:23 -0700 | [diff] [blame] | 288 | OpBuilder b(forOp.getOperation()); | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 289 | for (uint64_t d = 0, e = sortedInstGroups.size(); d < e; ++d) { | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 290 | // If nothing is shifted by d, continue. | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 291 | if (sortedInstGroups[d].empty()) | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 292 | continue; | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 293 | if (!instGroupQueue.empty()) { | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 294 | assert(d >= 1 && | 
|  | 295 | "Queue expected to be empty when the first block is found"); | 
|  | 296 | // The interval for which the loop needs to be generated here is: | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 297 | // [lbShift, min(lbShift + tripCount, d)) and the body of the | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 298 | // loop needs to have all operations in instQueue in that order. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 299 | AffineForOp res; | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 300 | if (lbShift + tripCount * step < d * step) { | 
|  | 301 | res = generateLoop( | 
|  | 302 | b.getShiftedAffineMap(origLbMap, lbShift), | 
|  | 303 | b.getShiftedAffineMap(origLbMap, lbShift + tripCount * step), | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 304 | instGroupQueue, 0, forOp, b); | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 305 | // Entire loop for the queued op groups generated, empty it. | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 306 | instGroupQueue.clear(); | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 307 | lbShift += tripCount * step; | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 308 | } else { | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 309 | res = generateLoop(b.getShiftedAffineMap(origLbMap, lbShift), | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 310 | b.getShiftedAffineMap(origLbMap, d), instGroupQueue, | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 311 | 0, forOp, b); | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 312 | lbShift = d * step; | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 313 | } | 
|  | 314 | if (!prologue && res) | 
|  | 315 | prologue = res; | 
|  | 316 | epilogue = res; | 
|  | 317 | } else { | 
|  | 318 | // Start of first interval. | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 319 | lbShift = d * step; | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 320 | } | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 321 | // Augment the list of operations that get into the current open interval. | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 322 | instGroupQueue.push_back({d, sortedInstGroups[d]}); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 323 | } | 
|  | 324 |  | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 325 | // Those operations groups left in the queue now need to be processed (FIFO) | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 326 | // and their loops completed. | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 327 | for (unsigned i = 0, e = instGroupQueue.size(); i < e; ++i) { | 
|  | 328 | uint64_t ubShift = (instGroupQueue[i].first + tripCount) * step; | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 329 | epilogue = generateLoop(b.getShiftedAffineMap(origLbMap, lbShift), | 
|  | 330 | b.getShiftedAffineMap(origLbMap, ubShift), | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 331 | instGroupQueue, i, forOp, b); | 
| Uday Bondhugula | b9f53dc | 2018-12-10 15:17:25 -0800 | [diff] [blame] | 332 | lbShift = ubShift; | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 333 | if (!prologue) | 
|  | 334 | prologue = epilogue; | 
|  | 335 | } | 
|  | 336 |  | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 337 | // Erase the original for op. | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 338 | forOp.erase(); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 339 |  | 
|  | 340 | if (unrollPrologueEpilogue && prologue) | 
|  | 341 | loopUnrollFull(prologue); | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 342 | if (unrollPrologueEpilogue && !epilogue && | 
| River Riddle | f9d9153 | 2019-03-26 17:05:09 -0700 | [diff] [blame] | 343 | epilogue.getOperation() != prologue.getOperation()) | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 344 | loopUnrollFull(epilogue); | 
|  | 345 |  | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 346 | return success(); | 
| Uday Bondhugula | 041817a | 2018-09-28 12:17:26 -0700 | [diff] [blame] | 347 | } | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 348 |  | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 349 | // Collect perfectly nested loops starting from `rootForOps`.  Loops are | 
|  | 350 | // perfectly nested if each loop is the first and only non-terminator operation | 
|  | 351 | // in the parent loop.  Collect at most `maxLoops` loops and append them to | 
|  | 352 | // `forOps`. | 
|  | 353 | template <typename T> | 
|  | 354 | void getPerfectlyNestedLoopsImpl( | 
|  | 355 | SmallVectorImpl<T> &forOps, T rootForOp, | 
|  | 356 | unsigned maxLoops = std::numeric_limits<unsigned>::max()) { | 
|  | 357 | for (unsigned i = 0; i < maxLoops; ++i) { | 
|  | 358 | forOps.push_back(rootForOp); | 
| Uday Bondhugula | 74eabdd | 2019-09-27 11:57:52 -0700 | [diff] [blame] | 359 | Block &body = rootForOp.region().front(); | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 360 | if (body.begin() != std::prev(body.end(), 2)) | 
|  | 361 | return; | 
|  | 362 |  | 
|  | 363 | rootForOp = dyn_cast<T>(&body.front()); | 
|  | 364 | if (!rootForOp) | 
|  | 365 | return; | 
|  | 366 | } | 
|  | 367 | } | 
|  | 368 |  | 
| MLIR Team | 0cd589c | 2019-04-04 15:19:17 -0700 | [diff] [blame] | 369 | /// Get perfectly nested sequence of loops starting at root of loop nest | 
|  | 370 | /// (the first op being another AffineFor, and the second op - a terminator). | 
|  | 371 | /// A loop is perfectly nested iff: the first op in the loop's body is another | 
|  | 372 | /// AffineForOp, and the second op is a terminator). | 
|  | 373 | void mlir::getPerfectlyNestedLoops(SmallVectorImpl<AffineForOp> &nestedLoops, | 
|  | 374 | AffineForOp root) { | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 375 | getPerfectlyNestedLoopsImpl(nestedLoops, root); | 
| MLIR Team | 0cd589c | 2019-04-04 15:19:17 -0700 | [diff] [blame] | 376 | } | 
|  | 377 |  | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 378 | void mlir::getPerfectlyNestedLoops(SmallVectorImpl<loop::ForOp> &nestedLoops, | 
|  | 379 | loop::ForOp root) { | 
|  | 380 | getPerfectlyNestedLoopsImpl(nestedLoops, root); | 
|  | 381 | } | 
|  | 382 |  | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 383 | /// Unrolls this loop completely. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 384 | LogicalResult mlir::loopUnrollFull(AffineForOp forOp) { | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 385 | Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 386 | if (mayBeConstantTripCount.hasValue()) { | 
|  | 387 | uint64_t tripCount = mayBeConstantTripCount.getValue(); | 
|  | 388 | if (tripCount == 1) { | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 389 | return promoteIfSingleIteration(forOp); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 390 | } | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 391 | return loopUnrollByFactor(forOp, tripCount); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 392 | } | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 393 | return failure(); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 394 | } | 
|  | 395 |  | 
|  | 396 | /// Unrolls and jams this loop by the specified factor or by the trip count (if | 
|  | 397 | /// constant) whichever is lower. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 398 | LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp, | 
| River Riddle | 80d3568 | 2019-03-08 16:04:42 -0800 | [diff] [blame] | 399 | uint64_t unrollFactor) { | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 400 | Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 401 |  | 
|  | 402 | if (mayBeConstantTripCount.hasValue() && | 
|  | 403 | mayBeConstantTripCount.getValue() < unrollFactor) | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 404 | return loopUnrollByFactor(forOp, mayBeConstantTripCount.getValue()); | 
|  | 405 | return loopUnrollByFactor(forOp, unrollFactor); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 406 | } | 
|  | 407 |  | 
| River Riddle | ba6fdc8 | 2019-03-06 17:37:14 -0800 | [diff] [blame] | 408 | /// Unrolls this loop by the specified factor. Returns success if the loop | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 409 | /// is successfully unrolled. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 410 | LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp, | 
| River Riddle | 80d3568 | 2019-03-08 16:04:42 -0800 | [diff] [blame] | 411 | uint64_t unrollFactor) { | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 412 | assert(unrollFactor >= 1 && "unroll factor should be >= 1"); | 
|  | 413 |  | 
| Uday Bondhugula | 92e9d94 | 2019-01-22 15:48:07 -0800 | [diff] [blame] | 414 | if (unrollFactor == 1) | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 415 | return promoteIfSingleIteration(forOp); | 
| Uday Bondhugula | 92e9d94 | 2019-01-22 15:48:07 -0800 | [diff] [blame] | 416 |  | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 417 | if (forOp.getBody()->empty() || | 
|  | 418 | forOp.getBody()->begin() == std::prev(forOp.getBody()->end())) | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 419 | return failure(); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 420 |  | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 421 | // Loops where the lower bound is a max expression isn't supported for | 
|  | 422 | // unrolling since the trip count can be expressed as an affine function when | 
|  | 423 | // both the lower bound and the upper bound are multi-result maps. However, | 
|  | 424 | // one meaningful way to do such unrolling would be to specialize the loop for | 
|  | 425 | // the 'hotspot' case and unroll that hotspot. | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 426 | if (forOp.getLowerBoundMap().getNumResults() != 1) | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 427 | return failure(); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 428 |  | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 429 | // If the trip count is lower than the unroll factor, no unrolled body. | 
|  | 430 | // TODO(bondhugula): option to specify cleanup loop unrolling. | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 431 | Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 432 | if (mayBeConstantTripCount.hasValue() && | 
|  | 433 | mayBeConstantTripCount.getValue() < unrollFactor) | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 434 | return failure(); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 435 |  | 
|  | 436 | // Generate the cleanup loop if trip count isn't a multiple of unrollFactor. | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 437 | Operation *op = forOp.getOperation(); | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 438 | if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) { | 
| River Riddle | f1b848e | 2019-06-04 19:18:23 -0700 | [diff] [blame] | 439 | OpBuilder builder(op->getBlock(), ++Block::iterator(op)); | 
| River Riddle | adca3c2 | 2019-05-11 17:57:32 -0700 | [diff] [blame] | 440 | auto cleanupForInst = cast<AffineForOp>(builder.clone(*op)); | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 441 | AffineMap cleanupMap; | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 442 | SmallVector<Value, 4> cleanupOperands; | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 443 | getCleanupLoopLowerBound(forOp, unrollFactor, &cleanupMap, &cleanupOperands, | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 444 | builder); | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 445 | assert(cleanupMap && | 
|  | 446 | "cleanup loop lower bound map for single result lower bound maps " | 
|  | 447 | "can always be determined"); | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 448 | cleanupForInst.setLowerBound(cleanupOperands, cleanupMap); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 449 | // Promote the loop body up if this has turned into a single iteration loop. | 
| Chris Lattner | 456ad6a | 2018-12-28 16:05:35 -0800 | [diff] [blame] | 450 | promoteIfSingleIteration(cleanupForInst); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 451 |  | 
| Uday Bondhugula | 075090f | 2019-03-12 08:00:52 -0700 | [diff] [blame] | 452 | // Adjust upper bound of the original loop; this is the same as the lower | 
|  | 453 | // bound of the cleanup loop. | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 454 | forOp.setUpperBound(cleanupOperands, cleanupMap); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 455 | } | 
|  | 456 |  | 
|  | 457 | // Scale the step of loop being unrolled by unroll factor. | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 458 | int64_t step = forOp.getStep(); | 
|  | 459 | forOp.setStep(step * unrollFactor); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 460 |  | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 461 | // Builder to insert unrolled bodies just before the terminator of the body of | 
|  | 462 | // 'forOp'. | 
| River Riddle | f1b848e | 2019-06-04 19:18:23 -0700 | [diff] [blame] | 463 | OpBuilder builder = forOp.getBodyBuilder(); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 464 |  | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 465 | // Keep a pointer to the last non-terminator operation in the original block | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 466 | // so that we know what to clone (since we are doing this in-place). | 
|  | 467 | Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 468 |  | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 469 | // Unroll the contents of 'forOp' (append unrollFactor-1 additional copies). | 
| River Riddle | 35807bc | 2019-12-22 21:59:55 -0800 | [diff] [blame] | 470 | auto forOpIV = forOp.getInductionVar(); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 471 | for (unsigned i = 1; i < unrollFactor; i++) { | 
| River Riddle | 451869f | 2019-01-24 12:25:30 -0800 | [diff] [blame] | 472 | BlockAndValueMapping operandMap; | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 473 |  | 
|  | 474 | // If the induction variable is used, create a remapping to the value for | 
|  | 475 | // this unrolled instance. | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 476 | if (!forOpIV.use_empty()) { | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 477 | // iv' = iv + 1/2/3...unrollFactor-1; | 
|  | 478 | auto d0 = builder.getAffineDimExpr(0); | 
| River Riddle | 2acc220 | 2019-10-17 20:08:01 -0700 | [diff] [blame] | 479 | auto bumpMap = AffineMap::get(1, 0, {d0 + i * step}); | 
| Chris Lattner | b42bea2 | 2019-01-27 09:33:19 -0800 | [diff] [blame] | 480 | auto ivUnroll = | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 481 | builder.create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV); | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 482 | operandMap.map(forOpIV, ivUnroll); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 483 | } | 
|  | 484 |  | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 485 | // Clone the original body of 'forOp'. | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 486 | for (auto it = forOp.getBody()->begin(); it != std::next(srcBlockEnd); | 
| Chris Lattner | 1301f90 | 2018-12-23 08:17:48 -0800 | [diff] [blame] | 487 | it++) { | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 488 | builder.clone(*it, operandMap); | 
|  | 489 | } | 
|  | 490 | } | 
|  | 491 |  | 
|  | 492 | // Promote the loop body up if this has turned into a single iteration loop. | 
| River Riddle | 5052bd8 | 2019-02-01 16:42:18 -0800 | [diff] [blame] | 493 | promoteIfSingleIteration(forOp); | 
| River Riddle | 0310d49 | 2019-03-10 15:32:54 -0700 | [diff] [blame] | 494 | return success(); | 
| Alex Zinenko | cb40633 | 2018-11-14 14:15:48 -0800 | [diff] [blame] | 495 | } | 
| MLIR Team | 8f5f2c7 | 2019-02-15 09:32:18 -0800 | [diff] [blame] | 496 |  | 
|  | 497 | /// Performs loop interchange on 'forOpA' and 'forOpB', where 'forOpB' is | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 498 | /// nested within 'forOpA' as the only non-terminator operation in its block. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 499 | void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) { | 
| River Riddle | f9d9153 | 2019-03-26 17:05:09 -0700 | [diff] [blame] | 500 | auto *forOpAInst = forOpA.getOperation(); | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 501 |  | 
| River Riddle | f9d9153 | 2019-03-26 17:05:09 -0700 | [diff] [blame] | 502 | assert(&*forOpA.getBody()->begin() == forOpB.getOperation()); | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 503 | auto &forOpABody = forOpA.getBody()->getOperations(); | 
|  | 504 | auto &forOpBBody = forOpB.getBody()->getOperations(); | 
|  | 505 |  | 
|  | 506 | // 1) Splice forOpA's non-terminator operations (which is just forOpB) just | 
|  | 507 | // before forOpA (in ForOpA's parent's block) this should leave 'forOpA's | 
|  | 508 | // body containing only the terminator. | 
|  | 509 | forOpAInst->getBlock()->getOperations().splice(Block::iterator(forOpAInst), | 
|  | 510 | forOpABody, forOpABody.begin(), | 
|  | 511 | std::prev(forOpABody.end())); | 
|  | 512 | // 2) Splice forOpB's non-terminator operations into the beginning of forOpA's | 
|  | 513 | // body (this leaves forOpB's body containing only the terminator). | 
|  | 514 | forOpABody.splice(forOpABody.begin(), forOpBBody, forOpBBody.begin(), | 
|  | 515 | std::prev(forOpBBody.end())); | 
|  | 516 | // 3) Splice forOpA into the beginning of forOpB's body. | 
|  | 517 | forOpBBody.splice(forOpBBody.begin(), forOpAInst->getBlock()->getOperations(), | 
|  | 518 | Block::iterator(forOpAInst)); | 
| MLIR Team | 8f5f2c7 | 2019-02-15 09:32:18 -0800 | [diff] [blame] | 519 | } | 
|  | 520 |  | 
| Andy Davis | 90d4023 | 2019-05-13 06:57:56 -0700 | [diff] [blame] | 521 | // Checks each dependence component against the permutation to see if the | 
|  | 522 | // desired loop interchange would violate dependences by making the | 
| Kazuaki Ishizaki | 8bfedb3 | 2019-10-20 00:11:03 -0700 | [diff] [blame] | 523 | // dependence component lexicographically negative. | 
| Andy Davis | 90d4023 | 2019-05-13 06:57:56 -0700 | [diff] [blame] | 524 | static bool checkLoopInterchangeDependences( | 
| River Riddle | 4562e38 | 2019-12-18 09:28:48 -0800 | [diff] [blame] | 525 | const std::vector<SmallVector<DependenceComponent, 2>> &depCompsVec, | 
| Andy Davis | 90d4023 | 2019-05-13 06:57:56 -0700 | [diff] [blame] | 526 | ArrayRef<AffineForOp> loops, ArrayRef<unsigned> loopPermMap) { | 
|  | 527 | // Invert permutation map. | 
|  | 528 | unsigned maxLoopDepth = loops.size(); | 
| River Riddle | 4562e38 | 2019-12-18 09:28:48 -0800 | [diff] [blame] | 529 | SmallVector<unsigned, 4> loopPermMapInv; | 
| Andy Davis | 90d4023 | 2019-05-13 06:57:56 -0700 | [diff] [blame] | 530 | loopPermMapInv.resize(maxLoopDepth); | 
|  | 531 | for (unsigned i = 0; i < maxLoopDepth; ++i) | 
|  | 532 | loopPermMapInv[loopPermMap[i]] = i; | 
|  | 533 |  | 
|  | 534 | // Check each dependence component against the permutation to see if the | 
|  | 535 | // desired loop interchange permutation would make the dependence vectors | 
|  | 536 | // lexicographically negative. | 
|  | 537 | // Example 1: [-1, 1][0, 0] | 
|  | 538 | // Example 2: [0, 0][-1, 1] | 
|  | 539 | for (unsigned i = 0, e = depCompsVec.size(); i < e; ++i) { | 
| River Riddle | 4562e38 | 2019-12-18 09:28:48 -0800 | [diff] [blame] | 540 | const SmallVector<DependenceComponent, 2> &depComps = depCompsVec[i]; | 
| Andy Davis | 90d4023 | 2019-05-13 06:57:56 -0700 | [diff] [blame] | 541 | assert(depComps.size() >= maxLoopDepth); | 
|  | 542 | // Check if the first non-zero dependence component is positive. | 
|  | 543 | // This iterates through loops in the desired order. | 
|  | 544 | for (unsigned j = 0; j < maxLoopDepth; ++j) { | 
|  | 545 | unsigned permIndex = loopPermMapInv[j]; | 
|  | 546 | assert(depComps[permIndex].lb.hasValue()); | 
|  | 547 | int64_t depCompLb = depComps[permIndex].lb.getValue(); | 
|  | 548 | if (depCompLb > 0) | 
|  | 549 | break; | 
|  | 550 | if (depCompLb < 0) | 
|  | 551 | return false; | 
|  | 552 | } | 
|  | 553 | } | 
|  | 554 | return true; | 
|  | 555 | } | 
|  | 556 |  | 
|  | 557 | /// Checks if the loop interchange permutation 'loopPermMap' of the perfectly | 
|  | 558 | /// nested sequence of loops in 'loops' would violate dependences. | 
|  | 559 | bool mlir::isValidLoopInterchangePermutation(ArrayRef<AffineForOp> loops, | 
|  | 560 | ArrayRef<unsigned> loopPermMap) { | 
|  | 561 | // Gather dependence components for dependences between all ops in loop nest | 
|  | 562 | // rooted at 'loops[0]', at loop depths in range [1, maxLoopDepth]. | 
|  | 563 | assert(loopPermMap.size() == loops.size()); | 
|  | 564 | unsigned maxLoopDepth = loops.size(); | 
| River Riddle | 4562e38 | 2019-12-18 09:28:48 -0800 | [diff] [blame] | 565 | std::vector<SmallVector<DependenceComponent, 2>> depCompsVec; | 
| Andy Davis | 90d4023 | 2019-05-13 06:57:56 -0700 | [diff] [blame] | 566 | getDependenceComponents(loops[0], maxLoopDepth, &depCompsVec); | 
|  | 567 | return checkLoopInterchangeDependences(depCompsVec, loops, loopPermMap); | 
|  | 568 | } | 
|  | 569 |  | 
|  | 570 | /// Performs a sequence of loop interchanges of loops in perfectly nested | 
|  | 571 | /// sequence of loops in 'loops', as specified by permutation in 'loopPermMap'. | 
|  | 572 | unsigned mlir::interchangeLoops(ArrayRef<AffineForOp> loops, | 
|  | 573 | ArrayRef<unsigned> loopPermMap) { | 
|  | 574 | Optional<unsigned> loopNestRootIndex; | 
|  | 575 | for (int i = loops.size() - 1; i >= 0; --i) { | 
|  | 576 | int permIndex = static_cast<int>(loopPermMap[i]); | 
|  | 577 | // Store the index of the for loop which will be the new loop nest root. | 
|  | 578 | if (permIndex == 0) | 
|  | 579 | loopNestRootIndex = i; | 
|  | 580 | if (permIndex > i) { | 
|  | 581 | // Sink loop 'i' by 'permIndex - i' levels deeper into the loop nest. | 
|  | 582 | sinkLoop(loops[i], permIndex - i); | 
|  | 583 | } | 
|  | 584 | } | 
|  | 585 | assert(loopNestRootIndex.hasValue()); | 
|  | 586 | return loopNestRootIndex.getValue(); | 
|  | 587 | } | 
|  | 588 |  | 
|  | 589 | // Sinks all sequential loops to the innermost levels (while preserving | 
|  | 590 | // relative order among them) and moves all parallel loops to the | 
|  | 591 | // outermost (while again preserving relative order among them). | 
|  | 592 | AffineForOp mlir::sinkSequentialLoops(AffineForOp forOp) { | 
|  | 593 | SmallVector<AffineForOp, 4> loops; | 
|  | 594 | getPerfectlyNestedLoops(loops, forOp); | 
|  | 595 | if (loops.size() < 2) | 
|  | 596 | return forOp; | 
|  | 597 |  | 
|  | 598 | // Gather dependence components for dependences between all ops in loop nest | 
|  | 599 | // rooted at 'loops[0]', at loop depths in range [1, maxLoopDepth]. | 
|  | 600 | unsigned maxLoopDepth = loops.size(); | 
| River Riddle | 4562e38 | 2019-12-18 09:28:48 -0800 | [diff] [blame] | 601 | std::vector<SmallVector<DependenceComponent, 2>> depCompsVec; | 
| Andy Davis | 90d4023 | 2019-05-13 06:57:56 -0700 | [diff] [blame] | 602 | getDependenceComponents(loops[0], maxLoopDepth, &depCompsVec); | 
|  | 603 |  | 
|  | 604 | // Mark loops as either parallel or sequential. | 
| River Riddle | 4562e38 | 2019-12-18 09:28:48 -0800 | [diff] [blame] | 605 | SmallVector<bool, 8> isParallelLoop(maxLoopDepth, true); | 
| Andy Davis | 90d4023 | 2019-05-13 06:57:56 -0700 | [diff] [blame] | 606 | for (unsigned i = 0, e = depCompsVec.size(); i < e; ++i) { | 
| River Riddle | 4562e38 | 2019-12-18 09:28:48 -0800 | [diff] [blame] | 607 | SmallVector<DependenceComponent, 2> &depComps = depCompsVec[i]; | 
| Andy Davis | 90d4023 | 2019-05-13 06:57:56 -0700 | [diff] [blame] | 608 | assert(depComps.size() >= maxLoopDepth); | 
|  | 609 | for (unsigned j = 0; j < maxLoopDepth; ++j) { | 
|  | 610 | DependenceComponent &depComp = depComps[j]; | 
|  | 611 | assert(depComp.lb.hasValue() && depComp.ub.hasValue()); | 
|  | 612 | if (depComp.lb.getValue() != 0 || depComp.ub.getValue() != 0) | 
|  | 613 | isParallelLoop[j] = false; | 
|  | 614 | } | 
|  | 615 | } | 
|  | 616 |  | 
|  | 617 | // Count the number of parallel loops. | 
|  | 618 | unsigned numParallelLoops = 0; | 
|  | 619 | for (unsigned i = 0, e = isParallelLoop.size(); i < e; ++i) | 
|  | 620 | if (isParallelLoop[i]) | 
|  | 621 | ++numParallelLoops; | 
|  | 622 |  | 
|  | 623 | // Compute permutation of loops that sinks sequential loops (and thus raises | 
|  | 624 | // parallel loops) while preserving relative order. | 
| River Riddle | 4562e38 | 2019-12-18 09:28:48 -0800 | [diff] [blame] | 625 | SmallVector<unsigned, 4> loopPermMap(maxLoopDepth); | 
| Andy Davis | 90d4023 | 2019-05-13 06:57:56 -0700 | [diff] [blame] | 626 | unsigned nextSequentialLoop = numParallelLoops; | 
|  | 627 | unsigned nextParallelLoop = 0; | 
|  | 628 | for (unsigned i = 0; i < maxLoopDepth; ++i) { | 
|  | 629 | if (isParallelLoop[i]) { | 
|  | 630 | loopPermMap[i] = nextParallelLoop++; | 
|  | 631 | } else { | 
|  | 632 | loopPermMap[i] = nextSequentialLoop++; | 
|  | 633 | } | 
|  | 634 | } | 
|  | 635 |  | 
|  | 636 | // Check if permutation 'loopPermMap' would violate dependences. | 
|  | 637 | if (!checkLoopInterchangeDependences(depCompsVec, loops, loopPermMap)) | 
|  | 638 | return forOp; | 
|  | 639 | // Perform loop interchange according to permutation 'loopPermMap'. | 
|  | 640 | unsigned loopNestRootIndex = interchangeLoops(loops, loopPermMap); | 
|  | 641 | return loops[loopNestRootIndex]; | 
|  | 642 | } | 
|  | 643 |  | 
| MLIR Team | 8f5f2c7 | 2019-02-15 09:32:18 -0800 | [diff] [blame] | 644 | /// Performs a series of loop interchanges to sink 'forOp' 'loopDepth' levels | 
|  | 645 | /// deeper in the loop nest. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 646 | void mlir::sinkLoop(AffineForOp forOp, unsigned loopDepth) { | 
| MLIR Team | 8f5f2c7 | 2019-02-15 09:32:18 -0800 | [diff] [blame] | 647 | for (unsigned i = 0; i < loopDepth; ++i) { | 
| River Riddle | adca3c2 | 2019-05-11 17:57:32 -0700 | [diff] [blame] | 648 | AffineForOp nextForOp = cast<AffineForOp>(forOp.getBody()->front()); | 
| MLIR Team | 8f5f2c7 | 2019-02-15 09:32:18 -0800 | [diff] [blame] | 649 | interchangeLoops(forOp, nextForOp); | 
|  | 650 | } | 
|  | 651 | } | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 652 |  | 
| Nicolas Vasilache | d6c650c | 2019-03-20 10:23:04 -0700 | [diff] [blame] | 653 | // Factors out common behavior to add a new `iv` (resp. `iv` + `offset`) to the | 
| Nicolas Vasilache | fc5bbdd | 2019-03-21 07:32:51 -0700 | [diff] [blame] | 654 | // lower (resp. upper) loop bound. When called for both the lower and upper | 
|  | 655 | // bounds, the resulting IR resembles: | 
| Nicolas Vasilache | d6c650c | 2019-03-20 10:23:04 -0700 | [diff] [blame] | 656 | // | 
|  | 657 | // ```mlir | 
| River Riddle | 832567b | 2019-03-25 10:14:34 -0700 | [diff] [blame] | 658 | //    affine.for %i = max (`iv, ...) to min (`iv` + `offset`) { | 
| Nicolas Vasilache | fc5bbdd | 2019-03-21 07:32:51 -0700 | [diff] [blame] | 659 | //      ... | 
|  | 660 | //    } | 
|  | 661 | // ``` | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 662 | static void augmentMapAndBounds(OpBuilder &b, Value iv, AffineMap *map, | 
|  | 663 | SmallVector<Value, 4> *operands, | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 664 | int64_t offset = 0) { | 
|  | 665 | auto bounds = llvm::to_vector<4>(map->getResults()); | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 666 | bounds.push_back(b.getAffineDimExpr(map->getNumDims()) + offset); | 
| Nicolas Vasilache | d6c650c | 2019-03-20 10:23:04 -0700 | [diff] [blame] | 667 | operands->insert(operands->begin() + map->getNumDims(), iv); | 
| River Riddle | 2acc220 | 2019-10-17 20:08:01 -0700 | [diff] [blame] | 668 | *map = AffineMap::get(map->getNumDims() + 1, map->getNumSymbols(), bounds); | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 669 | canonicalizeMapAndOperands(map, operands); | 
|  | 670 | } | 
|  | 671 |  | 
|  | 672 | // Stripmines `forOp` by `factor` and sinks it under each of the `targets`. | 
|  | 673 | // Stripmine-sink is a primitive building block for generalized tiling of | 
|  | 674 | // imperfectly nested loops. | 
|  | 675 | // This transformation is purely mechanical and does not check legality, | 
|  | 676 | // profitability or even structural correctness. It is the user's | 
|  | 677 | // responsibility to specify `targets` that are dominated by `forOp`. | 
|  | 678 | // Returns the new AffineForOps, one per `targets`, nested immediately under | 
|  | 679 | // each of the `targets`. | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 680 | static SmallVector<AffineForOp, 8> | 
|  | 681 | stripmineSink(AffineForOp forOp, uint64_t factor, | 
|  | 682 | ArrayRef<AffineForOp> targets) { | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 683 | auto originalStep = forOp.getStep(); | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 684 | auto scaledStep = originalStep * factor; | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 685 | forOp.setStep(scaledStep); | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 686 |  | 
| River Riddle | 99b87c9 | 2019-03-27 14:02:02 -0700 | [diff] [blame] | 687 | auto *op = forOp.getOperation(); | 
| River Riddle | f1b848e | 2019-06-04 19:18:23 -0700 | [diff] [blame] | 688 | OpBuilder b(op->getBlock(), ++Block::iterator(op)); | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 689 |  | 
|  | 690 | // Lower-bound map creation. | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 691 | auto lbMap = forOp.getLowerBoundMap(); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 692 | SmallVector<Value, 4> lbOperands(forOp.getLowerBoundOperands()); | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 693 | augmentMapAndBounds(b, forOp.getInductionVar(), &lbMap, &lbOperands); | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 694 |  | 
|  | 695 | // Upper-bound map creation. | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 696 | auto ubMap = forOp.getUpperBoundMap(); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 697 | SmallVector<Value, 4> ubOperands(forOp.getUpperBoundOperands()); | 
| Nicolas Vasilache | 0804750 | 2019-06-20 15:10:35 -0700 | [diff] [blame] | 698 | augmentMapAndBounds(b, forOp.getInductionVar(), &ubMap, &ubOperands, | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 699 | /*offset=*/scaledStep); | 
|  | 700 |  | 
| River Riddle | 35807bc | 2019-12-22 21:59:55 -0800 | [diff] [blame] | 701 | auto iv = forOp.getInductionVar(); | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 702 | SmallVector<AffineForOp, 8> innerLoops; | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 703 | for (auto t : targets) { | 
| Alex Zinenko | 5a5bba0 | 2019-03-27 05:11:58 -0700 | [diff] [blame] | 704 | // Insert newForOp before the terminator of `t`. | 
| River Riddle | f1b848e | 2019-06-04 19:18:23 -0700 | [diff] [blame] | 705 | OpBuilder b = t.getBodyBuilder(); | 
| River Riddle | af1abcc | 2019-03-25 11:13:31 -0700 | [diff] [blame] | 706 | auto newForOp = b.create<AffineForOp>(t.getLoc(), lbOperands, lbMap, | 
| Nicolas Vasilache | d6c650c | 2019-03-20 10:23:04 -0700 | [diff] [blame] | 707 | ubOperands, ubMap, originalStep); | 
| Nicolas Vasilache | d2a8729 | 2019-07-19 05:02:39 -0700 | [diff] [blame] | 708 | auto begin = t.getBody()->begin(); | 
|  | 709 | // Skip terminator and `newForOp` which is just before the terminator. | 
|  | 710 | auto nOps = t.getBody()->getOperations().size() - 2; | 
|  | 711 | newForOp.getBody()->getOperations().splice( | 
|  | 712 | newForOp.getBody()->getOperations().begin(), | 
|  | 713 | t.getBody()->getOperations(), begin, std::next(begin, nOps)); | 
|  | 714 | replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(), | 
|  | 715 | newForOp.region()); | 
| Nicolas Vasilache | d6c650c | 2019-03-20 10:23:04 -0700 | [diff] [blame] | 716 | innerLoops.push_back(newForOp); | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 717 | } | 
|  | 718 |  | 
|  | 719 | return innerLoops; | 
|  | 720 | } | 
|  | 721 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 722 | static Loops stripmineSink(loop::ForOp forOp, Value factor, | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 723 | ArrayRef<loop::ForOp> targets) { | 
| River Riddle | 35807bc | 2019-12-22 21:59:55 -0800 | [diff] [blame] | 724 | auto originalStep = forOp.step(); | 
|  | 725 | auto iv = forOp.getInductionVar(); | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 726 |  | 
|  | 727 | OpBuilder b(forOp); | 
|  | 728 | forOp.setStep(b.create<MulIOp>(forOp.getLoc(), originalStep, factor)); | 
|  | 729 |  | 
|  | 730 | Loops innerLoops; | 
|  | 731 | for (auto t : targets) { | 
|  | 732 | // Save information for splicing ops out of t when done | 
|  | 733 | auto begin = t.getBody()->begin(); | 
|  | 734 | auto nOps = t.getBody()->getOperations().size(); | 
|  | 735 |  | 
|  | 736 | // Insert newForOp before the terminator of `t`. | 
|  | 737 | OpBuilder b(t.getBodyBuilder()); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 738 | Value stepped = b.create<AddIOp>(t.getLoc(), iv, forOp.step()); | 
|  | 739 | Value less = b.create<CmpIOp>(t.getLoc(), CmpIPredicate::slt, | 
|  | 740 | forOp.upperBound(), stepped); | 
|  | 741 | Value ub = | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 742 | b.create<SelectOp>(t.getLoc(), less, forOp.upperBound(), stepped); | 
|  | 743 |  | 
|  | 744 | // Splice [begin, begin + nOps - 1) into `newForOp` and replace uses. | 
|  | 745 | auto newForOp = b.create<loop::ForOp>(t.getLoc(), iv, ub, originalStep); | 
|  | 746 | newForOp.getBody()->getOperations().splice( | 
|  | 747 | newForOp.getBody()->getOperations().begin(), | 
|  | 748 | t.getBody()->getOperations(), begin, std::next(begin, nOps - 1)); | 
|  | 749 | replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(), | 
|  | 750 | newForOp.region()); | 
|  | 751 |  | 
|  | 752 | innerLoops.push_back(newForOp); | 
|  | 753 | } | 
|  | 754 |  | 
|  | 755 | return innerLoops; | 
|  | 756 | } | 
|  | 757 |  | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 758 | // Stripmines a `forOp` by `factor` and sinks it under a single `target`. | 
|  | 759 | // Returns the new AffineForOps, nested immediately under `target`. | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 760 | template <typename ForType, typename SizeType> | 
|  | 761 | static ForType stripmineSink(ForType forOp, SizeType factor, ForType target) { | 
|  | 762 | // TODO(ntv): Use cheap structural assertions that targets are nested under | 
|  | 763 | // forOp and that targets are not nested under each other when DominanceInfo | 
|  | 764 | // exposes the capability. It seems overkill to construct a whole function | 
|  | 765 | // dominance tree at this point. | 
|  | 766 | auto res = stripmineSink(forOp, factor, ArrayRef<ForType>{target}); | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 767 | assert(res.size() == 1 && "Expected 1 inner forOp"); | 
|  | 768 | return res[0]; | 
|  | 769 | } | 
|  | 770 |  | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 771 | template <typename ForType, typename SizeType> | 
|  | 772 | static SmallVector<SmallVector<ForType, 8>, 8> | 
|  | 773 | tileImpl(ArrayRef<ForType> forOps, ArrayRef<SizeType> sizes, | 
|  | 774 | ArrayRef<ForType> targets) { | 
|  | 775 | SmallVector<SmallVector<ForType, 8>, 8> res; | 
|  | 776 | SmallVector<ForType, 8> currentTargets(targets.begin(), targets.end()); | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 777 | for (auto it : llvm::zip(forOps, sizes)) { | 
|  | 778 | auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets); | 
|  | 779 | res.push_back(step); | 
|  | 780 | currentTargets = step; | 
|  | 781 | } | 
|  | 782 | return res; | 
|  | 783 | } | 
|  | 784 |  | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 785 | SmallVector<SmallVector<AffineForOp, 8>, 8> | 
|  | 786 | mlir::tile(ArrayRef<AffineForOp> forOps, ArrayRef<uint64_t> sizes, | 
|  | 787 | ArrayRef<AffineForOp> targets) { | 
|  | 788 | return tileImpl(forOps, sizes, targets); | 
|  | 789 | } | 
|  | 790 |  | 
|  | 791 | SmallVector<Loops, 8> mlir::tile(ArrayRef<loop::ForOp> forOps, | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 792 | ArrayRef<Value> sizes, | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 793 | ArrayRef<loop::ForOp> targets) { | 
|  | 794 | return tileImpl(forOps, sizes, targets); | 
|  | 795 | } | 
|  | 796 |  | 
|  | 797 | template <typename ForType, typename SizeType> | 
|  | 798 | static SmallVector<ForType, 8> | 
|  | 799 | tileImpl(ArrayRef<ForType> forOps, ArrayRef<SizeType> sizes, ForType target) { | 
|  | 800 | SmallVector<ForType, 8> res; | 
|  | 801 | for (auto loops : tile(forOps, sizes, ArrayRef<ForType>{target})) { | 
|  | 802 | assert(loops.size() == 1); | 
|  | 803 | res.push_back(loops[0]); | 
|  | 804 | } | 
|  | 805 | return res; | 
|  | 806 | } | 
|  | 807 |  | 
| Chris Lattner | d9b5bc8 | 2019-03-24 19:53:05 -0700 | [diff] [blame] | 808 | SmallVector<AffineForOp, 8> mlir::tile(ArrayRef<AffineForOp> forOps, | 
|  | 809 | ArrayRef<uint64_t> sizes, | 
|  | 810 | AffineForOp target) { | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 811 | return tileImpl(forOps, sizes, target); | 
| Nicolas Vasilache | 62c54a2 | 2019-02-25 09:53:05 -0800 | [diff] [blame] | 812 | } | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 813 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 814 | Loops mlir::tile(ArrayRef<loop::ForOp> forOps, ArrayRef<Value> sizes, | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 815 | loop::ForOp target) { | 
|  | 816 | return tileImpl(forOps, sizes, target); | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 817 | } | 
|  | 818 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 819 | Loops mlir::tilePerfectlyNested(loop::ForOp rootForOp, ArrayRef<Value> sizes) { | 
| Kazuaki Ishizaki | 8bfedb3 | 2019-10-20 00:11:03 -0700 | [diff] [blame] | 820 | // Collect perfectly nested loops.  If more size values provided than nested | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 821 | // loops available, truncate `sizes`. | 
| Nicolas Vasilache | cca53e8 | 2019-07-15 02:50:09 -0700 | [diff] [blame] | 822 | SmallVector<loop::ForOp, 4> forOps; | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 823 | forOps.reserve(sizes.size()); | 
|  | 824 | getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size()); | 
|  | 825 | if (forOps.size() < sizes.size()) | 
|  | 826 | sizes = sizes.take_front(forOps.size()); | 
|  | 827 |  | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 828 | return ::tile(forOps, sizes, forOps.back()); | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 829 | } | 
|  | 830 |  | 
|  | 831 | // Build the IR that performs ceil division of a positive value by a constant: | 
|  | 832 | //    ceildiv(a, B) = divis(a + (B-1), B) | 
| Kazuaki Ishizaki | 8bfedb3 | 2019-10-20 00:11:03 -0700 | [diff] [blame] | 833 | // where divis is rounding-to-zero division. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 834 | static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, | 
|  | 835 | int64_t divisor) { | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 836 | assert(divisor > 0 && "expected positive divisor"); | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 837 | assert(dividend.getType().isIndex() && "expected index-typed value"); | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 838 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 839 | Value divisorMinusOneCst = builder.create<ConstantIndexOp>(loc, divisor - 1); | 
|  | 840 | Value divisorCst = builder.create<ConstantIndexOp>(loc, divisor); | 
|  | 841 | Value sum = builder.create<AddIOp>(loc, dividend, divisorMinusOneCst); | 
| Manuel Freiberger | 22954a0 | 2019-12-22 10:01:35 -0800 | [diff] [blame] | 842 | return builder.create<SignedDivIOp>(loc, sum, divisorCst); | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 843 | } | 
|  | 844 |  | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 845 | // Build the IR that performs ceil division of a positive value by another | 
|  | 846 | // positive value: | 
|  | 847 | //    ceildiv(a, b) = divis(a + (b - 1), b) | 
|  | 848 | // where divis is rounding-to-zero division. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 849 | static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, | 
|  | 850 | Value divisor) { | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 851 | assert(dividend.getType().isIndex() && "expected index-typed value"); | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 852 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 853 | Value cstOne = builder.create<ConstantIndexOp>(loc, 1); | 
|  | 854 | Value divisorMinusOne = builder.create<SubIOp>(loc, divisor, cstOne); | 
|  | 855 | Value sum = builder.create<AddIOp>(loc, dividend, divisorMinusOne); | 
| Manuel Freiberger | 22954a0 | 2019-12-22 10:01:35 -0800 | [diff] [blame] | 856 | return builder.create<SignedDivIOp>(loc, sum, divisor); | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 857 | } | 
|  | 858 |  | 
| Nicolas Vasilache | 48a1bae | 2019-07-22 04:30:50 -0700 | [diff] [blame] | 859 | // Hoist the ops within `outer` that appear before `inner`. | 
|  | 860 | // Such ops include the ops that have been introduced by parametric tiling. | 
|  | 861 | // Ops that come from triangular loops (i.e. that belong to the program slice | 
|  | 862 | // rooted at `outer`) and ops that have side effects cannot be hoisted. | 
|  | 863 | // Return failure when any op fails to hoist. | 
|  | 864 | static LogicalResult hoistOpsBetween(loop::ForOp outer, loop::ForOp inner) { | 
|  | 865 | SetVector<Operation *> forwardSlice; | 
|  | 866 | getForwardSlice(outer.getOperation(), &forwardSlice, [&inner](Operation *op) { | 
|  | 867 | return op != inner.getOperation(); | 
|  | 868 | }); | 
|  | 869 | LogicalResult status = success(); | 
|  | 870 | SmallVector<Operation *, 8> toHoist; | 
|  | 871 | for (auto &op : outer.getBody()->getOperations()) { | 
|  | 872 | // Stop when encountering the inner loop. | 
|  | 873 | if (&op == inner.getOperation()) | 
|  | 874 | break; | 
|  | 875 | // Skip over non-hoistable ops. | 
|  | 876 | if (forwardSlice.count(&op) > 0) { | 
|  | 877 | status = failure(); | 
|  | 878 | continue; | 
|  | 879 | } | 
|  | 880 | // Skip loop::ForOp, these are not considered a failure. | 
|  | 881 | if (op.getNumRegions() > 0) | 
|  | 882 | continue; | 
|  | 883 | // Skip other ops with regions. | 
|  | 884 | if (op.getNumRegions() > 0) { | 
|  | 885 | status = failure(); | 
|  | 886 | continue; | 
|  | 887 | } | 
|  | 888 | // Skip if op has side effects. | 
|  | 889 | // TODO(ntv): loads to immutable memory regions are ok. | 
|  | 890 | if (!op.hasNoSideEffect()) { | 
|  | 891 | status = failure(); | 
|  | 892 | continue; | 
|  | 893 | } | 
|  | 894 | toHoist.push_back(&op); | 
|  | 895 | } | 
|  | 896 | auto *outerForOp = outer.getOperation(); | 
|  | 897 | for (auto *op : toHoist) | 
|  | 898 | op->moveBefore(outerForOp); | 
|  | 899 | return status; | 
|  | 900 | } | 
|  | 901 |  | 
|  | 902 | // Traverse the interTile and intraTile loops and try to hoist ops such that | 
|  | 903 | // bands of perfectly nested loops are isolated. | 
|  | 904 | // Return failure if either perfect interTile or perfect intraTile bands cannot | 
|  | 905 | // be formed. | 
|  | 906 | static LogicalResult tryIsolateBands(const TileLoops &tileLoops) { | 
|  | 907 | LogicalResult status = success(); | 
|  | 908 | auto &interTile = tileLoops.first; | 
|  | 909 | auto &intraTile = tileLoops.second; | 
|  | 910 | auto size = interTile.size(); | 
|  | 911 | assert(size == intraTile.size()); | 
|  | 912 | if (size <= 1) | 
|  | 913 | return success(); | 
|  | 914 | for (unsigned s = 1; s < size; ++s) | 
|  | 915 | status = succeeded(status) ? hoistOpsBetween(intraTile[0], intraTile[s]) | 
|  | 916 | : failure(); | 
|  | 917 | for (unsigned s = 1; s < size; ++s) | 
|  | 918 | status = succeeded(status) ? hoistOpsBetween(interTile[0], interTile[s]) | 
|  | 919 | : failure(); | 
|  | 920 | return status; | 
|  | 921 | } | 
|  | 922 |  | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 923 | TileLoops mlir::extractFixedOuterLoops(loop::ForOp rootForOp, | 
|  | 924 | ArrayRef<int64_t> sizes) { | 
| Kazuaki Ishizaki | ae05cf2 | 2019-12-09 09:23:15 -0800 | [diff] [blame] | 925 | // Collect perfectly nested loops.  If more size values provided than nested | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 926 | // loops available, truncate `sizes`. | 
| Nicolas Vasilache | cca53e8 | 2019-07-15 02:50:09 -0700 | [diff] [blame] | 927 | SmallVector<loop::ForOp, 4> forOps; | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 928 | forOps.reserve(sizes.size()); | 
|  | 929 | getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size()); | 
|  | 930 | if (forOps.size() < sizes.size()) | 
|  | 931 | sizes = sizes.take_front(forOps.size()); | 
|  | 932 |  | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 933 | // Compute the tile sizes such that i-th outer loop executes size[i] | 
|  | 934 | // iterations.  Given that the loop current executes | 
|  | 935 | //   numIterations = ceildiv((upperBound - lowerBound), step) | 
|  | 936 | // iterations, we need to tile with size ceildiv(numIterations, size[i]). | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 937 | SmallVector<Value, 4> tileSizes; | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 938 | tileSizes.reserve(sizes.size()); | 
|  | 939 | for (unsigned i = 0, e = sizes.size(); i < e; ++i) { | 
|  | 940 | assert(sizes[i] > 0 && "expected strictly positive size for strip-mining"); | 
|  | 941 |  | 
|  | 942 | auto forOp = forOps[i]; | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 943 | OpBuilder builder(forOp); | 
|  | 944 | auto loc = forOp.getLoc(); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 945 | Value diff = | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 946 | builder.create<SubIOp>(loc, forOp.upperBound(), forOp.lowerBound()); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 947 | Value numIterations = ceilDivPositive(builder, loc, diff, forOp.step()); | 
|  | 948 | Value iterationsPerBlock = | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 949 | ceilDivPositive(builder, loc, numIterations, sizes[i]); | 
|  | 950 | tileSizes.push_back(iterationsPerBlock); | 
|  | 951 | } | 
|  | 952 |  | 
|  | 953 | // Call parametric tiling with the given sizes. | 
| Nicolas Vasilache | 5bc3447 | 2019-07-19 01:52:36 -0700 | [diff] [blame] | 954 | auto intraTile = tile(forOps, tileSizes, forOps.back()); | 
| Nicolas Vasilache | 48a1bae | 2019-07-22 04:30:50 -0700 | [diff] [blame] | 955 | TileLoops tileLoops = std::make_pair(forOps, intraTile); | 
|  | 956 |  | 
|  | 957 | // TODO(ntv, zinenko) for now we just ignore the result of band isolation. | 
|  | 958 | // In the future, mapping decisions may be impacted by the ability to | 
|  | 959 | // isolate perfectly nested bands. | 
|  | 960 | tryIsolateBands(tileLoops); | 
|  | 961 |  | 
|  | 962 | return tileLoops; | 
| Alex Zinenko | 9d03f56 | 2019-07-09 06:37:17 -0700 | [diff] [blame] | 963 | } | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 964 |  | 
|  | 965 | // Replaces all uses of `orig` with `replacement` except if the user is listed | 
|  | 966 | // in `exceptions`. | 
|  | 967 | static void | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 968 | replaceAllUsesExcept(Value orig, Value replacement, | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 969 | const SmallPtrSetImpl<Operation *> &exceptions) { | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 970 | for (auto &use : llvm::make_early_inc_range(orig.getUses())) { | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 971 | if (exceptions.count(use.getOwner()) == 0) | 
|  | 972 | use.set(replacement); | 
|  | 973 | } | 
|  | 974 | } | 
|  | 975 |  | 
|  | 976 | // Transform a loop with a strictly positive step | 
|  | 977 | //   for %i = %lb to %ub step %s | 
|  | 978 | // into a 0-based loop with step 1 | 
|  | 979 | //   for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 { | 
|  | 980 | //     %i = %ii * %s + %lb | 
|  | 981 | // Insert the induction variable remapping in the body of `inner`, which is | 
|  | 982 | // expected to be either `loop` or another loop perfectly nested under `loop`. | 
|  | 983 | // Insert the definition of new bounds immediate before `outer`, which is | 
|  | 984 | // expected to be either `loop` or its parent in the loop nest. | 
|  | 985 | static void normalizeLoop(loop::ForOp loop, loop::ForOp outer, | 
|  | 986 | loop::ForOp inner) { | 
|  | 987 | OpBuilder builder(outer); | 
|  | 988 | Location loc = loop.getLoc(); | 
|  | 989 |  | 
|  | 990 | // Check if the loop is already known to have a constant zero lower bound or | 
|  | 991 | // a constant one step. | 
|  | 992 | bool isZeroBased = false; | 
|  | 993 | if (auto ubCst = | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 994 | dyn_cast_or_null<ConstantIndexOp>(loop.lowerBound().getDefiningOp())) | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 995 | isZeroBased = ubCst.getValue() == 0; | 
|  | 996 |  | 
|  | 997 | bool isStepOne = false; | 
|  | 998 | if (auto stepCst = | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 999 | dyn_cast_or_null<ConstantIndexOp>(loop.step().getDefiningOp())) | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1000 | isStepOne = stepCst.getValue() == 1; | 
|  | 1001 |  | 
|  | 1002 | if (isZeroBased && isStepOne) | 
|  | 1003 | return; | 
|  | 1004 |  | 
|  | 1005 | // Compute the number of iterations the loop executes: ceildiv(ub - lb, step) | 
|  | 1006 | // assuming the step is strictly positive.  Update the bounds and the step | 
|  | 1007 | // of the loop to go from 0 to the number of iterations, if necessary. | 
|  | 1008 | // TODO(zinenko): introduce support for negative steps or emit dynamic asserts | 
|  | 1009 | // on step positivity, whatever gets implemented first. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1010 | Value diff = | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1011 | builder.create<SubIOp>(loc, loop.upperBound(), loop.lowerBound()); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1012 | Value numIterations = ceilDivPositive(builder, loc, diff, loop.step()); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1013 | loop.setUpperBound(numIterations); | 
|  | 1014 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1015 | Value lb = loop.lowerBound(); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1016 | if (!isZeroBased) { | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1017 | Value cst0 = builder.create<ConstantIndexOp>(loc, 0); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1018 | loop.setLowerBound(cst0); | 
|  | 1019 | } | 
|  | 1020 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1021 | Value step = loop.step(); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1022 | if (!isStepOne) { | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1023 | Value cst1 = builder.create<ConstantIndexOp>(loc, 1); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1024 | loop.setStep(cst1); | 
|  | 1025 | } | 
|  | 1026 |  | 
|  | 1027 | // Insert code computing the value of the original loop induction variable | 
|  | 1028 | // from the "normalized" one. | 
| Nicolas Vasilache | 0002e29 | 2019-07-16 12:20:15 -0700 | [diff] [blame] | 1029 | builder.setInsertionPointToStart(inner.getBody()); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1030 | Value scaled = | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1031 | isStepOne ? loop.getInductionVar() | 
|  | 1032 | : builder.create<MulIOp>(loc, loop.getInductionVar(), step); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1033 | Value shifted = | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1034 | isZeroBased ? scaled : builder.create<AddIOp>(loc, scaled, lb); | 
|  | 1035 |  | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 1036 | SmallPtrSet<Operation *, 2> preserve{scaled.getDefiningOp(), | 
|  | 1037 | shifted.getDefiningOp()}; | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1038 | replaceAllUsesExcept(loop.getInductionVar(), shifted, preserve); | 
|  | 1039 | } | 
|  | 1040 |  | 
|  | 1041 | void mlir::coalesceLoops(MutableArrayRef<loop::ForOp> loops) { | 
|  | 1042 | if (loops.size() < 2) | 
|  | 1043 | return; | 
|  | 1044 |  | 
|  | 1045 | loop::ForOp innermost = loops.back(); | 
|  | 1046 | loop::ForOp outermost = loops.front(); | 
|  | 1047 |  | 
|  | 1048 | // 1. Make sure all loops iterate from 0 to upperBound with step 1.  This | 
|  | 1049 | // allows the following code to assume upperBound is the number of iterations. | 
|  | 1050 | for (auto loop : loops) | 
|  | 1051 | normalizeLoop(loop, outermost, innermost); | 
|  | 1052 |  | 
|  | 1053 | // 2. Emit code computing the upper bound of the coalesced loop as product | 
|  | 1054 | // of the number of iterations of all loops. | 
|  | 1055 | OpBuilder builder(outermost); | 
|  | 1056 | Location loc = outermost.getLoc(); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1057 | Value upperBound = outermost.upperBound(); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1058 | for (auto loop : loops.drop_front()) | 
|  | 1059 | upperBound = builder.create<MulIOp>(loc, upperBound, loop.upperBound()); | 
|  | 1060 | outermost.setUpperBound(upperBound); | 
|  | 1061 |  | 
| Nicolas Vasilache | 0002e29 | 2019-07-16 12:20:15 -0700 | [diff] [blame] | 1062 | builder.setInsertionPointToStart(outermost.getBody()); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1063 |  | 
|  | 1064 | // 3. Remap induction variables.  For each original loop, the value of the | 
|  | 1065 | // induction variable can be obtained by dividing the induction variable of | 
|  | 1066 | // the linearized loop by the total number of iterations of the loops nested | 
|  | 1067 | // in it modulo the number of iterations in this loop (remove the values | 
|  | 1068 | // related to the outer loops): | 
|  | 1069 | //   iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i. | 
|  | 1070 | // Compute these iteratively from the innermost loop by creating a "running | 
|  | 1071 | // quotient" of division by the range. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1072 | Value previous = outermost.getInductionVar(); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1073 | for (unsigned i = 0, e = loops.size(); i < e; ++i) { | 
|  | 1074 | unsigned idx = loops.size() - i - 1; | 
|  | 1075 | if (i != 0) | 
| Manuel Freiberger | 22954a0 | 2019-12-22 10:01:35 -0800 | [diff] [blame] | 1076 | previous = builder.create<SignedDivIOp>(loc, previous, | 
|  | 1077 | loops[idx + 1].upperBound()); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1078 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1079 | Value iv = (i == e - 1) ? previous | 
|  | 1080 | : builder.create<SignedRemIOp>( | 
|  | 1081 | loc, previous, loops[idx].upperBound()); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1082 | replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv, | 
|  | 1083 | loops.back().region()); | 
|  | 1084 | } | 
|  | 1085 |  | 
|  | 1086 | // 4. Move the operations from the innermost just above the second-outermost | 
|  | 1087 | // loop, delete the extra terminator and the second-outermost loop. | 
|  | 1088 | loop::ForOp second = loops[1]; | 
| Nicolas Vasilache | 0002e29 | 2019-07-16 12:20:15 -0700 | [diff] [blame] | 1089 | innermost.getBody()->back().erase(); | 
|  | 1090 | outermost.getBody()->getOperations().splice( | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1091 | Block::iterator(second.getOperation()), | 
| Nicolas Vasilache | 0002e29 | 2019-07-16 12:20:15 -0700 | [diff] [blame] | 1092 | innermost.getBody()->getOperations()); | 
| Alex Zinenko | fc044e8 | 2019-07-15 06:40:07 -0700 | [diff] [blame] | 1093 | second.erase(); | 
|  | 1094 | } | 
| Nicolas Vasilache | db4cd1c | 2019-07-19 04:47:27 -0700 | [diff] [blame] | 1095 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1096 | void mlir::mapLoopToProcessorIds(loop::ForOp forOp, ArrayRef<Value> processorId, | 
|  | 1097 | ArrayRef<Value> numProcessors) { | 
| Nicolas Vasilache | db4cd1c | 2019-07-19 04:47:27 -0700 | [diff] [blame] | 1098 | assert(processorId.size() == numProcessors.size()); | 
|  | 1099 | if (processorId.empty()) | 
|  | 1100 | return; | 
|  | 1101 |  | 
|  | 1102 | OpBuilder b(forOp); | 
|  | 1103 | Location loc(forOp.getLoc()); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1104 | Value mul = processorId.front(); | 
| Nicolas Vasilache | db4cd1c | 2019-07-19 04:47:27 -0700 | [diff] [blame] | 1105 | for (unsigned i = 1, e = processorId.size(); i < e; ++i) | 
|  | 1106 | mul = b.create<AddIOp>(loc, b.create<MulIOp>(loc, mul, numProcessors[i]), | 
|  | 1107 | processorId[i]); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1108 | Value lb = b.create<AddIOp>(loc, forOp.lowerBound(), | 
|  | 1109 | b.create<MulIOp>(loc, forOp.step(), mul)); | 
| Nicolas Vasilache | db4cd1c | 2019-07-19 04:47:27 -0700 | [diff] [blame] | 1110 | forOp.setLowerBound(lb); | 
|  | 1111 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1112 | Value step = forOp.step(); | 
| River Riddle | 35807bc | 2019-12-22 21:59:55 -0800 | [diff] [blame] | 1113 | for (auto numProcs : numProcessors) | 
| Nicolas Vasilache | db4cd1c | 2019-07-19 04:47:27 -0700 | [diff] [blame] | 1114 | step = b.create<MulIOp>(loc, step, numProcs); | 
|  | 1115 | forOp.setStep(step); | 
|  | 1116 | } | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1117 |  | 
|  | 1118 | /// Given a memref region, determine the lowest depth at which transfers can be | 
|  | 1119 | /// placed for it, and return the corresponding block, start and end positions | 
|  | 1120 | /// in the block for placing incoming (read) and outgoing (write) copies | 
|  | 1121 | /// respectively. The lowest depth depends on whether the region being accessed | 
|  | 1122 | /// is hoistable with respect to one or more immediately surrounding loops. | 
|  | 1123 | static void | 
|  | 1124 | findHighestBlockForPlacement(const MemRefRegion ®ion, Block &block, | 
|  | 1125 | Block::iterator &begin, Block::iterator &end, | 
|  | 1126 | Block **copyPlacementBlock, | 
|  | 1127 | Block::iterator *copyInPlacementStart, | 
|  | 1128 | Block::iterator *copyOutPlacementStart) { | 
|  | 1129 | const auto *cst = region.getConstraints(); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1130 | SmallVector<Value, 4> symbols; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1131 | cst->getIdValues(cst->getNumDimIds(), cst->getNumDimAndSymbolIds(), &symbols); | 
|  | 1132 |  | 
|  | 1133 | SmallVector<AffineForOp, 4> enclosingFors; | 
|  | 1134 | getLoopIVs(*block.begin(), &enclosingFors); | 
|  | 1135 | // Walk up loop parents till we find an IV on which this region is | 
|  | 1136 | // symbolic/variant. | 
|  | 1137 | auto it = enclosingFors.rbegin(); | 
|  | 1138 | for (auto e = enclosingFors.rend(); it != e; ++it) { | 
|  | 1139 | // TODO(bondhugula): also need to be checking this for regions symbols that | 
|  | 1140 | // aren't loop IVs, whether we are within their resp. defs' dominance scope. | 
|  | 1141 | if (llvm::is_contained(symbols, it->getInductionVar())) | 
|  | 1142 | break; | 
|  | 1143 | } | 
|  | 1144 |  | 
|  | 1145 | if (it != enclosingFors.rbegin()) { | 
|  | 1146 | auto lastInvariantIV = *std::prev(it); | 
|  | 1147 | *copyInPlacementStart = Block::iterator(lastInvariantIV.getOperation()); | 
|  | 1148 | *copyOutPlacementStart = std::next(*copyInPlacementStart); | 
|  | 1149 | *copyPlacementBlock = lastInvariantIV.getOperation()->getBlock(); | 
|  | 1150 | } else { | 
|  | 1151 | *copyInPlacementStart = begin; | 
|  | 1152 | *copyOutPlacementStart = end; | 
|  | 1153 | *copyPlacementBlock = █ | 
|  | 1154 | } | 
|  | 1155 | } | 
|  | 1156 |  | 
|  | 1157 | // Info comprising stride and number of elements transferred every stride. | 
|  | 1158 | struct StrideInfo { | 
|  | 1159 | int64_t stride; | 
|  | 1160 | int64_t numEltPerStride; | 
|  | 1161 | }; | 
|  | 1162 |  | 
|  | 1163 | /// Returns striding information for a copy/transfer of this region with | 
|  | 1164 | /// potentially multiple striding levels from outermost to innermost. For an | 
|  | 1165 | /// n-dimensional region, there can be at most n-1 levels of striding | 
|  | 1166 | /// successively nested. | 
|  | 1167 | //  TODO(bondhugula): make this work with non-identity layout maps. | 
|  | 1168 | static void getMultiLevelStrides(const MemRefRegion ®ion, | 
|  | 1169 | ArrayRef<int64_t> bufferShape, | 
|  | 1170 | SmallVectorImpl<StrideInfo> *strideInfos) { | 
|  | 1171 | if (bufferShape.size() <= 1) | 
|  | 1172 | return; | 
|  | 1173 |  | 
|  | 1174 | int64_t numEltPerStride = 1; | 
|  | 1175 | int64_t stride = 1; | 
|  | 1176 | for (int d = bufferShape.size() - 1; d >= 1; d--) { | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 1177 | int64_t dimSize = region.memref.getType().cast<MemRefType>().getDimSize(d); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1178 | stride *= dimSize; | 
|  | 1179 | numEltPerStride *= bufferShape[d]; | 
|  | 1180 | // A stride is needed only if the region has a shorter extent than the | 
|  | 1181 | // memref along the dimension *and* has an extent greater than one along the | 
|  | 1182 | // next major dimension. | 
|  | 1183 | if (bufferShape[d] < dimSize && bufferShape[d - 1] > 1) { | 
|  | 1184 | strideInfos->push_back({stride, numEltPerStride}); | 
|  | 1185 | } | 
|  | 1186 | } | 
|  | 1187 | } | 
|  | 1188 |  | 
|  | 1189 | /// Generates a point-wise copy from/to `memref' to/from `fastMemRef' and | 
|  | 1190 | /// returns the outermost AffineForOp of the copy loop nest. `memIndicesStart' | 
|  | 1191 | /// holds the lower coordinates of the region in the original memref to copy | 
|  | 1192 | /// in/out. If `copyOut' is true, generates a copy-out; otherwise a copy-in. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1193 | static AffineForOp generatePointWiseCopy(Location loc, Value memref, | 
|  | 1194 | Value fastMemRef, | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1195 | AffineMap memAffineMap, | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1196 | ArrayRef<Value> memIndicesStart, | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1197 | ArrayRef<int64_t> fastBufferShape, | 
|  | 1198 | bool isCopyOut, OpBuilder b) { | 
|  | 1199 | assert(!memIndicesStart.empty() && "only 1-d or more memrefs"); | 
|  | 1200 |  | 
|  | 1201 | // The copy-in nest is generated as follows as an example for a 2-d region: | 
|  | 1202 | // for x = ... | 
|  | 1203 | //   for y = ... | 
|  | 1204 | //     fast_buf[x][y] = buf[mem_x + x][mem_y + y] | 
|  | 1205 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1206 | SmallVector<Value, 4> fastBufIndices, memIndices; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1207 | AffineForOp copyNestRoot; | 
|  | 1208 | for (unsigned d = 0, e = fastBufferShape.size(); d < e; ++d) { | 
|  | 1209 | auto forOp = b.create<AffineForOp>(loc, 0, fastBufferShape[d]); | 
|  | 1210 | if (d == 0) | 
|  | 1211 | copyNestRoot = forOp; | 
|  | 1212 | b = forOp.getBodyBuilder(); | 
|  | 1213 | fastBufIndices.push_back(forOp.getInductionVar()); | 
|  | 1214 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1215 | Value memBase = | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1216 | (memAffineMap == b.getMultiDimIdentityMap(memAffineMap.getNumDims())) | 
|  | 1217 | ? memIndicesStart[d] | 
|  | 1218 | : b.create<AffineApplyOp>( | 
|  | 1219 | loc, | 
| River Riddle | 2acc220 | 2019-10-17 20:08:01 -0700 | [diff] [blame] | 1220 | AffineMap::get(memAffineMap.getNumDims(), | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1221 | memAffineMap.getNumSymbols(), | 
|  | 1222 | memAffineMap.getResult(d)), | 
|  | 1223 | memIndicesStart); | 
|  | 1224 |  | 
|  | 1225 | // Construct the subscript for the slow memref being copied. | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1226 | auto memIndex = b.create<AffineApplyOp>( | 
|  | 1227 | loc, | 
| River Riddle | 2acc220 | 2019-10-17 20:08:01 -0700 | [diff] [blame] | 1228 | AffineMap::get(2, 0, b.getAffineDimExpr(0) + b.getAffineDimExpr(1)), | 
| Uday Bondhugula | a63f6e0 | 2019-12-09 06:26:05 -0800 | [diff] [blame] | 1229 | ValueRange({memBase, forOp.getInductionVar()})); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1230 | memIndices.push_back(memIndex); | 
|  | 1231 | } | 
|  | 1232 |  | 
|  | 1233 | if (!isCopyOut) { | 
|  | 1234 | // Copy in. | 
|  | 1235 | auto load = b.create<AffineLoadOp>(loc, memref, memIndices); | 
|  | 1236 | b.create<AffineStoreOp>(loc, load, fastMemRef, fastBufIndices); | 
|  | 1237 | return copyNestRoot; | 
|  | 1238 | } | 
|  | 1239 |  | 
|  | 1240 | // Copy out. | 
|  | 1241 | auto load = b.create<AffineLoadOp>(loc, fastMemRef, fastBufIndices); | 
|  | 1242 | b.create<AffineStoreOp>(loc, load, memref, memIndices); | 
|  | 1243 | return copyNestRoot; | 
|  | 1244 | } | 
|  | 1245 |  | 
|  | 1246 | static InFlightDiagnostic LLVM_ATTRIBUTE_UNUSED | 
|  | 1247 | emitRemarkForBlock(Block &block) { | 
|  | 1248 | return block.getParentOp()->emitRemark(); | 
|  | 1249 | } | 
|  | 1250 |  | 
|  | 1251 | /// Creates a buffer in the faster memory space for the specified memref region; | 
|  | 1252 | /// generates a copy from the lower memory space to this one, and replaces all | 
|  | 1253 | /// loads/stores in the block range [`begin', `end') of `block' to load/store | 
|  | 1254 | /// from that buffer. Returns failure if copies could not be generated due to | 
|  | 1255 | /// yet unimplemented cases. `copyInPlacementStart` and `copyOutPlacementStart` | 
|  | 1256 | /// in copyPlacementBlock specify the insertion points where the incoming copies | 
|  | 1257 | /// and outgoing copies, respectively, should be inserted (the insertion happens | 
|  | 1258 | /// right before the insertion point). Since `begin` can itself be invalidated | 
|  | 1259 | /// due to the memref rewriting done from this method, the output argument | 
|  | 1260 | /// `nBegin` is set to its replacement (set to `begin` if no invalidation | 
|  | 1261 | /// happens). Since outgoing copies could have  been inserted at `end`, the | 
|  | 1262 | /// output argument `nEnd` is set to the new end. `sizeInBytes` is set to the | 
|  | 1263 | /// size of the fast buffer allocated. | 
|  | 1264 | static LogicalResult generateCopy( | 
|  | 1265 | const MemRefRegion ®ion, Block *block, Block::iterator begin, | 
|  | 1266 | Block::iterator end, Block *copyPlacementBlock, | 
|  | 1267 | Block::iterator copyInPlacementStart, Block::iterator copyOutPlacementStart, | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1268 | AffineCopyOptions copyOptions, DenseMap<Value, Value> &fastBufferMap, | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1269 | DenseSet<Operation *> ©Nests, uint64_t *sizeInBytes, | 
|  | 1270 | Block::iterator *nBegin, Block::iterator *nEnd) { | 
|  | 1271 | *nBegin = begin; | 
|  | 1272 | *nEnd = end; | 
|  | 1273 |  | 
|  | 1274 | FuncOp f = begin->getParentOfType<FuncOp>(); | 
|  | 1275 | OpBuilder topBuilder(f.getBody()); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1276 | Value zeroIndex = topBuilder.create<ConstantIndexOp>(f.getLoc(), 0); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1277 |  | 
|  | 1278 | if (begin == end) | 
|  | 1279 | return success(); | 
|  | 1280 |  | 
|  | 1281 | // Is the copy out point at the end of the block where we are doing | 
|  | 1282 | // explicit copying. | 
|  | 1283 | bool isCopyOutAtEndOfBlock = (end == copyOutPlacementStart); | 
|  | 1284 |  | 
|  | 1285 | // Copies for read regions are going to be inserted at 'begin'. | 
|  | 1286 | OpBuilder prologue(copyPlacementBlock, copyInPlacementStart); | 
|  | 1287 | // Copies for write regions are going to be inserted at 'end'. | 
|  | 1288 | OpBuilder epilogue(copyPlacementBlock, copyOutPlacementStart); | 
|  | 1289 | OpBuilder &b = region.isWrite() ? epilogue : prologue; | 
|  | 1290 |  | 
|  | 1291 | // Builder to create constants at the top level. | 
|  | 1292 | auto func = copyPlacementBlock->getParent()->getParentOfType<FuncOp>(); | 
|  | 1293 | OpBuilder top(func.getBody()); | 
|  | 1294 |  | 
|  | 1295 | auto loc = region.loc; | 
| River Riddle | 35807bc | 2019-12-22 21:59:55 -0800 | [diff] [blame] | 1296 | auto memref = region.memref; | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 1297 | auto memRefType = memref.getType().cast<MemRefType>(); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1298 |  | 
|  | 1299 | auto layoutMaps = memRefType.getAffineMaps(); | 
|  | 1300 | if (layoutMaps.size() > 1 || | 
|  | 1301 | (layoutMaps.size() == 1 && !layoutMaps[0].isIdentity())) { | 
|  | 1302 | LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n"); | 
|  | 1303 | return failure(); | 
|  | 1304 | } | 
|  | 1305 |  | 
|  | 1306 | // Indices to use for the copying. | 
|  | 1307 | // Indices for the original memref being copied from/to. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1308 | SmallVector<Value, 4> memIndices; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1309 | // Indices for the faster buffer being copied into/from. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1310 | SmallVector<Value, 4> bufIndices; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1311 |  | 
|  | 1312 | unsigned rank = memRefType.getRank(); | 
|  | 1313 | SmallVector<int64_t, 4> fastBufferShape; | 
|  | 1314 |  | 
|  | 1315 | // Compute the extents of the buffer. | 
|  | 1316 | std::vector<SmallVector<int64_t, 4>> lbs; | 
|  | 1317 | SmallVector<int64_t, 8> lbDivisors; | 
|  | 1318 | lbs.reserve(rank); | 
|  | 1319 | Optional<int64_t> numElements = region.getConstantBoundingSizeAndShape( | 
|  | 1320 | &fastBufferShape, &lbs, &lbDivisors); | 
|  | 1321 | if (!numElements.hasValue()) { | 
|  | 1322 | LLVM_DEBUG(llvm::dbgs() << "Non-constant region size not supported\n"); | 
|  | 1323 | return failure(); | 
|  | 1324 | } | 
|  | 1325 |  | 
|  | 1326 | if (numElements.getValue() == 0) { | 
|  | 1327 | LLVM_DEBUG(llvm::dbgs() << "Nothing to copy\n"); | 
|  | 1328 | *sizeInBytes = 0; | 
|  | 1329 | return success(); | 
|  | 1330 | } | 
|  | 1331 |  | 
|  | 1332 | const FlatAffineConstraints *cst = region.getConstraints(); | 
| Kazuaki Ishizaki | 8bfedb3 | 2019-10-20 00:11:03 -0700 | [diff] [blame] | 1333 | // 'regionSymbols' hold values that this memory region is symbolic/parametric | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1334 | // on; these typically include loop IVs surrounding the level at which the | 
|  | 1335 | // copy generation is being done or other valid symbols in MLIR. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1336 | SmallVector<Value, 8> regionSymbols; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1337 | cst->getIdValues(rank, cst->getNumIds(), ®ionSymbols); | 
|  | 1338 |  | 
|  | 1339 | // Construct the index expressions for the fast memory buffer. The index | 
|  | 1340 | // expression for a particular dimension of the fast buffer is obtained by | 
|  | 1341 | // subtracting out the lower bound on the original memref's data region | 
|  | 1342 | // along the corresponding dimension. | 
|  | 1343 |  | 
|  | 1344 | // Index start offsets for faster memory buffer relative to the original. | 
|  | 1345 | SmallVector<AffineExpr, 4> offsets; | 
|  | 1346 | offsets.reserve(rank); | 
|  | 1347 | for (unsigned d = 0; d < rank; d++) { | 
|  | 1348 | assert(lbs[d].size() == cst->getNumCols() - rank && "incorrect bound size"); | 
|  | 1349 |  | 
|  | 1350 | AffineExpr offset = top.getAffineConstantExpr(0); | 
|  | 1351 | for (unsigned j = 0, e = cst->getNumCols() - rank - 1; j < e; j++) { | 
|  | 1352 | offset = offset + lbs[d][j] * top.getAffineDimExpr(j); | 
|  | 1353 | } | 
|  | 1354 | assert(lbDivisors[d] > 0); | 
|  | 1355 | offset = | 
|  | 1356 | (offset + lbs[d][cst->getNumCols() - 1 - rank]).floorDiv(lbDivisors[d]); | 
|  | 1357 |  | 
|  | 1358 | // Set copy start location for this dimension in the lower memory space | 
|  | 1359 | // memref. | 
|  | 1360 | if (auto caf = offset.dyn_cast<AffineConstantExpr>()) { | 
|  | 1361 | auto indexVal = caf.getValue(); | 
|  | 1362 | if (indexVal == 0) { | 
|  | 1363 | memIndices.push_back(zeroIndex); | 
|  | 1364 | } else { | 
|  | 1365 | memIndices.push_back( | 
|  | 1366 | top.create<ConstantIndexOp>(loc, indexVal).getResult()); | 
|  | 1367 | } | 
|  | 1368 | } else { | 
|  | 1369 | // The coordinate for the start location is just the lower bound along the | 
|  | 1370 | // corresponding dimension on the memory region (stored in 'offset'). | 
| River Riddle | 2acc220 | 2019-10-17 20:08:01 -0700 | [diff] [blame] | 1371 | auto map = AffineMap::get( | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1372 | cst->getNumDimIds() + cst->getNumSymbolIds() - rank, 0, offset); | 
|  | 1373 | memIndices.push_back(b.create<AffineApplyOp>(loc, map, regionSymbols)); | 
|  | 1374 | } | 
|  | 1375 | // The fast buffer is copied into at location zero; addressing is relative. | 
|  | 1376 | bufIndices.push_back(zeroIndex); | 
|  | 1377 |  | 
|  | 1378 | // Record the offsets since they are needed to remap the memory accesses of | 
|  | 1379 | // the original memref further below. | 
|  | 1380 | offsets.push_back(offset); | 
|  | 1381 | } | 
|  | 1382 |  | 
|  | 1383 | // The faster memory space buffer. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1384 | Value fastMemRef; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1385 |  | 
|  | 1386 | // Check if a buffer was already created. | 
|  | 1387 | bool existingBuf = fastBufferMap.count(memref) > 0; | 
|  | 1388 | if (!existingBuf) { | 
|  | 1389 | AffineMap fastBufferLayout = b.getMultiDimIdentityMap(rank); | 
|  | 1390 | auto fastMemRefType = | 
| River Riddle | 2acc220 | 2019-10-17 20:08:01 -0700 | [diff] [blame] | 1391 | MemRefType::get(fastBufferShape, memRefType.getElementType(), | 
|  | 1392 | fastBufferLayout, copyOptions.fastMemorySpace); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1393 |  | 
|  | 1394 | // Create the fast memory space buffer just before the 'affine.for' | 
|  | 1395 | // operation. | 
|  | 1396 | fastMemRef = prologue.create<AllocOp>(loc, fastMemRefType).getResult(); | 
|  | 1397 | // Record it. | 
|  | 1398 | fastBufferMap[memref] = fastMemRef; | 
|  | 1399 | // fastMemRefType is a constant shaped memref. | 
|  | 1400 | *sizeInBytes = getMemRefSizeInBytes(fastMemRefType).getValue(); | 
|  | 1401 | LLVM_DEBUG(emitRemarkForBlock(*block) | 
|  | 1402 | << "Creating fast buffer of type " << fastMemRefType | 
|  | 1403 | << " and size " << llvm::divideCeil(*sizeInBytes, 1024) | 
|  | 1404 | << " KiB\n"); | 
|  | 1405 | } else { | 
|  | 1406 | // Reuse the one already created. | 
|  | 1407 | fastMemRef = fastBufferMap[memref]; | 
|  | 1408 | *sizeInBytes = 0; | 
|  | 1409 | } | 
|  | 1410 |  | 
|  | 1411 | auto numElementsSSA = | 
|  | 1412 | top.create<ConstantIndexOp>(loc, numElements.getValue()); | 
|  | 1413 |  | 
|  | 1414 | SmallVector<StrideInfo, 4> strideInfos; | 
|  | 1415 | getMultiLevelStrides(region, fastBufferShape, &strideInfos); | 
|  | 1416 |  | 
|  | 1417 | // TODO(bondhugula): use all stride levels once DmaStartOp is extended for | 
|  | 1418 | // multi-level strides. | 
|  | 1419 | if (strideInfos.size() > 1) { | 
|  | 1420 | LLVM_DEBUG(llvm::dbgs() << "Only up to one level of stride supported\n"); | 
|  | 1421 | return failure(); | 
|  | 1422 | } | 
|  | 1423 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1424 | Value stride = nullptr; | 
|  | 1425 | Value numEltPerStride = nullptr; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1426 | if (!strideInfos.empty()) { | 
|  | 1427 | stride = top.create<ConstantIndexOp>(loc, strideInfos[0].stride); | 
|  | 1428 | numEltPerStride = | 
|  | 1429 | top.create<ConstantIndexOp>(loc, strideInfos[0].numEltPerStride); | 
|  | 1430 | } | 
|  | 1431 |  | 
|  | 1432 | // Record the last operation where we want the memref replacement to end. We | 
|  | 1433 | // later do the memref replacement only in [begin, postDomFilter] so | 
|  | 1434 | // that the original memref's used in the data movement code themselves don't | 
|  | 1435 | // get replaced. | 
|  | 1436 | auto postDomFilter = std::prev(end); | 
|  | 1437 |  | 
|  | 1438 | // Create fully composed affine maps for each memref. | 
|  | 1439 | auto memAffineMap = b.getMultiDimIdentityMap(memIndices.size()); | 
|  | 1440 | fullyComposeAffineMapAndOperands(&memAffineMap, &memIndices); | 
|  | 1441 | auto bufAffineMap = b.getMultiDimIdentityMap(bufIndices.size()); | 
|  | 1442 | fullyComposeAffineMapAndOperands(&bufAffineMap, &bufIndices); | 
|  | 1443 |  | 
|  | 1444 | if (!copyOptions.generateDma) { | 
|  | 1445 | // Point-wise copy generation. | 
|  | 1446 | auto copyNest = generatePointWiseCopy(loc, memref, fastMemRef, memAffineMap, | 
|  | 1447 | memIndices, fastBufferShape, | 
|  | 1448 | /*isCopyOut=*/region.isWrite(), b); | 
|  | 1449 |  | 
|  | 1450 | // Record this so that we can skip it from yet another copy. | 
|  | 1451 | copyNests.insert(copyNest); | 
|  | 1452 |  | 
|  | 1453 | // Since new ops are being appended (for copy out's), adjust the end to | 
|  | 1454 | // mark end of block range being processed if necessary. | 
|  | 1455 | if (region.isWrite() && isCopyOutAtEndOfBlock) | 
|  | 1456 | *nEnd = Block::iterator(copyNest.getOperation()); | 
|  | 1457 | } else { | 
|  | 1458 | // DMA generation. | 
|  | 1459 | // Create a tag (single element 1-d memref) for the DMA. | 
| River Riddle | 2acc220 | 2019-10-17 20:08:01 -0700 | [diff] [blame] | 1460 | auto tagMemRefType = MemRefType::get({1}, top.getIntegerType(32), {}, | 
|  | 1461 | copyOptions.tagMemorySpace); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1462 | auto tagMemRef = prologue.create<AllocOp>(loc, tagMemRefType); | 
|  | 1463 |  | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1464 | SmallVector<Value, 4> tagIndices({zeroIndex}); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1465 | auto tagAffineMap = b.getMultiDimIdentityMap(tagIndices.size()); | 
|  | 1466 | fullyComposeAffineMapAndOperands(&tagAffineMap, &tagIndices); | 
|  | 1467 | if (!region.isWrite()) { | 
|  | 1468 | // DMA non-blocking read from original buffer to fast buffer. | 
|  | 1469 | b.create<AffineDmaStartOp>(loc, memref, memAffineMap, memIndices, | 
|  | 1470 | fastMemRef, bufAffineMap, bufIndices, | 
|  | 1471 | tagMemRef, tagAffineMap, tagIndices, | 
|  | 1472 | numElementsSSA, stride, numEltPerStride); | 
|  | 1473 | } else { | 
|  | 1474 | // DMA non-blocking write from fast buffer to the original memref. | 
|  | 1475 | auto op = b.create<AffineDmaStartOp>( | 
|  | 1476 | loc, fastMemRef, bufAffineMap, bufIndices, memref, memAffineMap, | 
|  | 1477 | memIndices, tagMemRef, tagAffineMap, tagIndices, numElementsSSA, | 
|  | 1478 | stride, numEltPerStride); | 
|  | 1479 | // Since new ops may be appended at 'end' (for outgoing DMAs), adjust the | 
|  | 1480 | // end to mark end of block range being processed. | 
|  | 1481 | if (isCopyOutAtEndOfBlock) | 
|  | 1482 | *nEnd = Block::iterator(op.getOperation()); | 
|  | 1483 | } | 
|  | 1484 |  | 
|  | 1485 | // Matching DMA wait to block on completion; tag always has a 0 index. | 
|  | 1486 | b.create<AffineDmaWaitOp>(loc, tagMemRef, tagAffineMap, zeroIndex, | 
|  | 1487 | numElementsSSA); | 
|  | 1488 |  | 
|  | 1489 | // Generate dealloc for the tag. | 
|  | 1490 | auto tagDeallocOp = epilogue.create<DeallocOp>(loc, tagMemRef); | 
|  | 1491 | if (*nEnd == end && isCopyOutAtEndOfBlock) | 
|  | 1492 | // Since new ops are being appended (for outgoing DMAs), adjust the end to | 
|  | 1493 | // mark end of range of the original. | 
|  | 1494 | *nEnd = Block::iterator(tagDeallocOp.getOperation()); | 
|  | 1495 | } | 
|  | 1496 |  | 
|  | 1497 | // Generate dealloc for the buffer. | 
|  | 1498 | if (!existingBuf) { | 
|  | 1499 | auto bufDeallocOp = epilogue.create<DeallocOp>(loc, fastMemRef); | 
|  | 1500 | // When generating pointwise copies, `nEnd' has to be set to deallocOp on | 
|  | 1501 | // the fast buffer (since it marks the new end insertion point). | 
|  | 1502 | if (!copyOptions.generateDma && *nEnd == end && isCopyOutAtEndOfBlock) | 
|  | 1503 | *nEnd = Block::iterator(bufDeallocOp.getOperation()); | 
|  | 1504 | } | 
|  | 1505 |  | 
|  | 1506 | // Replace all uses of the old memref with the faster one while remapping | 
|  | 1507 | // access indices (subtracting out lower bound offsets for each dimension). | 
|  | 1508 | // Ex: to replace load %A[%i, %j] with load %Abuf[%i - %iT, %j - %jT], | 
|  | 1509 | // index remap will be (%i, %j) -> (%i - %iT, %j - %jT), | 
|  | 1510 | // i.e., affine.apply (d0, d1, d2, d3) -> (d2-d0, d3-d1) (%iT, %jT, %i, %j), | 
|  | 1511 | // and (%iT, %jT) will be the 'extraOperands' for 'rep all memref uses with'. | 
|  | 1512 | // d2, d3 correspond to the original indices (%i, %j). | 
|  | 1513 | SmallVector<AffineExpr, 4> remapExprs; | 
|  | 1514 | remapExprs.reserve(rank); | 
|  | 1515 | for (unsigned i = 0; i < rank; i++) { | 
|  | 1516 | // The starting operands of indexRemap will be regionSymbols (the symbols on | 
|  | 1517 | // which the memref region is parametric); then those corresponding to | 
|  | 1518 | // the memref's original indices follow. | 
|  | 1519 | auto dimExpr = b.getAffineDimExpr(regionSymbols.size() + i); | 
|  | 1520 | remapExprs.push_back(dimExpr - offsets[i]); | 
|  | 1521 | } | 
| River Riddle | 2acc220 | 2019-10-17 20:08:01 -0700 | [diff] [blame] | 1522 | auto indexRemap = AffineMap::get(regionSymbols.size() + rank, 0, remapExprs); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1523 |  | 
|  | 1524 | // Record the begin since it may be invalidated by memref replacement. | 
|  | 1525 | Block::iterator prevOfBegin; | 
|  | 1526 | bool isBeginAtStartOfBlock = (begin == block->begin()); | 
|  | 1527 | if (!isBeginAtStartOfBlock) | 
|  | 1528 | prevOfBegin = std::prev(begin); | 
|  | 1529 |  | 
|  | 1530 | // *Only* those uses within the range [begin, end) of 'block' are replaced. | 
|  | 1531 | replaceAllMemRefUsesWith(memref, fastMemRef, | 
|  | 1532 | /*extraIndices=*/{}, indexRemap, | 
|  | 1533 | /*extraOperands=*/regionSymbols, | 
| Uday Bondhugula | 727a50a | 2019-09-18 11:25:33 -0700 | [diff] [blame] | 1534 | /*symbolOperands=*/{}, | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1535 | /*domInstFilter=*/&*begin, | 
|  | 1536 | /*postDomInstFilter=*/&*postDomFilter); | 
|  | 1537 |  | 
|  | 1538 | *nBegin = isBeginAtStartOfBlock ? block->begin() : std::next(prevOfBegin); | 
|  | 1539 |  | 
|  | 1540 | return success(); | 
|  | 1541 | } | 
|  | 1542 |  | 
|  | 1543 | /// Construct the memref region to just include the entire memref. Returns false | 
|  | 1544 | /// dynamic shaped memref's for now. `numParamLoopIVs` is the number of | 
|  | 1545 | /// enclosing loop IVs of opInst (starting from the outermost) that the region | 
|  | 1546 | /// is parametric on. | 
|  | 1547 | static bool getFullMemRefAsRegion(Operation *opInst, unsigned numParamLoopIVs, | 
|  | 1548 | MemRefRegion *region) { | 
|  | 1549 | unsigned rank; | 
|  | 1550 | if (auto loadOp = dyn_cast<AffineLoadOp>(opInst)) { | 
|  | 1551 | rank = loadOp.getMemRefType().getRank(); | 
|  | 1552 | region->memref = loadOp.getMemRef(); | 
|  | 1553 | region->setWrite(false); | 
|  | 1554 | } else if (auto storeOp = dyn_cast<AffineStoreOp>(opInst)) { | 
|  | 1555 | rank = storeOp.getMemRefType().getRank(); | 
|  | 1556 | region->memref = storeOp.getMemRef(); | 
|  | 1557 | region->setWrite(true); | 
|  | 1558 | } else { | 
|  | 1559 | assert(false && "expected load or store op"); | 
|  | 1560 | return false; | 
|  | 1561 | } | 
| River Riddle | 2bdf33c | 2020-01-11 08:54:04 -0800 | [diff] [blame] | 1562 | auto memRefType = region->memref.getType().cast<MemRefType>(); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1563 | if (!memRefType.hasStaticShape()) | 
|  | 1564 | return false; | 
|  | 1565 |  | 
|  | 1566 | auto *regionCst = region->getConstraints(); | 
|  | 1567 |  | 
|  | 1568 | // Just get the first numSymbols IVs, which the memref region is parametric | 
|  | 1569 | // on. | 
|  | 1570 | SmallVector<AffineForOp, 4> ivs; | 
|  | 1571 | getLoopIVs(*opInst, &ivs); | 
|  | 1572 | ivs.resize(numParamLoopIVs); | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1573 | SmallVector<Value, 4> symbols; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1574 | extractForInductionVars(ivs, &symbols); | 
|  | 1575 | regionCst->reset(rank, numParamLoopIVs, 0); | 
|  | 1576 | regionCst->setIdValues(rank, rank + numParamLoopIVs, symbols); | 
|  | 1577 |  | 
|  | 1578 | // Memref dim sizes provide the bounds. | 
|  | 1579 | for (unsigned d = 0; d < rank; d++) { | 
|  | 1580 | auto dimSize = memRefType.getDimSize(d); | 
|  | 1581 | assert(dimSize > 0 && "filtered dynamic shapes above"); | 
|  | 1582 | regionCst->addConstantLowerBound(d, 0); | 
|  | 1583 | regionCst->addConstantUpperBound(d, dimSize - 1); | 
|  | 1584 | } | 
|  | 1585 | return true; | 
|  | 1586 | } | 
|  | 1587 |  | 
|  | 1588 | /// Generates copies for a contiguous sequence of operations in `block` in the | 
|  | 1589 | /// iterator range [`begin', `end'), where `end' can't be past the terminator of | 
|  | 1590 | /// the block (since additional operations are potentially inserted right before | 
|  | 1591 | /// `end'. Returns the total size of the fast buffers used. | 
|  | 1592 | //  Since we generate alloc's and dealloc's for all fast buffers (before and | 
|  | 1593 | //  after the range of operations resp.), all of the fast memory capacity is | 
|  | 1594 | //  assumed to be available for processing this block range. | 
|  | 1595 | uint64_t mlir::affineDataCopyGenerate(Block::iterator begin, | 
|  | 1596 | Block::iterator end, | 
|  | 1597 | const AffineCopyOptions ©Options, | 
|  | 1598 | DenseSet<Operation *> ©Nests) { | 
|  | 1599 | if (begin == end) | 
|  | 1600 | return 0; | 
|  | 1601 |  | 
|  | 1602 | assert(begin->getBlock() == std::prev(end)->getBlock() && | 
|  | 1603 | "Inconsistent block begin/end args"); | 
|  | 1604 | assert(end != end->getBlock()->end() && "end can't be the block terminator"); | 
|  | 1605 |  | 
|  | 1606 | Block *block = begin->getBlock(); | 
|  | 1607 |  | 
|  | 1608 | // Copies will be generated for this depth, i.e., symbolic in all loops | 
|  | 1609 | // surrounding the this block range. | 
|  | 1610 | unsigned copyDepth = getNestingDepth(*begin); | 
|  | 1611 |  | 
|  | 1612 | LLVM_DEBUG(llvm::dbgs() << "Generating copies at depth " << copyDepth | 
|  | 1613 | << "\n"); | 
|  | 1614 | LLVM_DEBUG(llvm::dbgs() << "from begin: " << *begin << "\n"); | 
|  | 1615 | LLVM_DEBUG(llvm::dbgs() << "to inclusive end: " << *std::prev(end) << "\n"); | 
|  | 1616 |  | 
|  | 1617 | // List of memory regions to copy for. We need a map vector to have a | 
|  | 1618 | // guaranteed iteration order to write test cases. CHECK-DAG doesn't help here | 
|  | 1619 | // since the alloc's for example are identical except for the SSA id. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1620 | SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> readRegions; | 
|  | 1621 | SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> writeRegions; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1622 |  | 
|  | 1623 | // Map from original memref's to the fast buffers that their accesses are | 
|  | 1624 | // replaced with. | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1625 | DenseMap<Value, Value> fastBufferMap; | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1626 |  | 
|  | 1627 | // To check for errors when walking the block. | 
|  | 1628 | bool error = false; | 
|  | 1629 |  | 
|  | 1630 | // Walk this range of operations  to gather all memory regions. | 
|  | 1631 | block->walk(begin, end, [&](Operation *opInst) { | 
|  | 1632 | // Gather regions to allocate to buffers in faster memory space. | 
|  | 1633 | if (auto loadOp = dyn_cast<AffineLoadOp>(opInst)) { | 
|  | 1634 | if ((loadOp.getMemRefType().getMemorySpace() != | 
|  | 1635 | copyOptions.slowMemorySpace)) | 
|  | 1636 | return; | 
|  | 1637 | } else if (auto storeOp = dyn_cast<AffineStoreOp>(opInst)) { | 
|  | 1638 | if (storeOp.getMemRefType().getMemorySpace() != | 
|  | 1639 | copyOptions.slowMemorySpace) | 
|  | 1640 | return; | 
|  | 1641 | } else { | 
|  | 1642 | // Neither load nor a store op. | 
|  | 1643 | return; | 
|  | 1644 | } | 
|  | 1645 |  | 
|  | 1646 | // Compute the MemRefRegion accessed. | 
|  | 1647 | auto region = std::make_unique<MemRefRegion>(opInst->getLoc()); | 
|  | 1648 | if (failed(region->compute(opInst, copyDepth))) { | 
|  | 1649 | LLVM_DEBUG(llvm::dbgs() | 
|  | 1650 | << "Error obtaining memory region: semi-affine maps?\n"); | 
|  | 1651 | LLVM_DEBUG(llvm::dbgs() << "over-approximating to the entire memref\n"); | 
|  | 1652 | if (!getFullMemRefAsRegion(opInst, copyDepth, region.get())) { | 
|  | 1653 | LLVM_DEBUG( | 
| MLIR Team | 1c73be7 | 2019-09-18 07:44:39 -0700 | [diff] [blame] | 1654 | opInst->emitError("non-constant memref sizes not yet supported")); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1655 | error = true; | 
|  | 1656 | return; | 
|  | 1657 | } | 
|  | 1658 | } | 
|  | 1659 |  | 
|  | 1660 | // Each memref has a single buffer associated with it irrespective of how | 
|  | 1661 | // many load's and store's happen on it. | 
|  | 1662 | // TODO(bondhugula): in the future, when regions don't intersect and satisfy | 
|  | 1663 | // other properties (based on load/store regions), we could consider | 
|  | 1664 | // multiple buffers per memref. | 
|  | 1665 |  | 
|  | 1666 | // Add to the appropriate region if it's not already in it, or take a | 
|  | 1667 | // bounding box union with the existing one if it's already in there. | 
|  | 1668 | // Note that a memref may have both read and write regions - so update the | 
|  | 1669 | // region in the other list if one exists (write in case of read and vice | 
|  | 1670 | // versa) since there is a single bounding box for a memref across all reads | 
|  | 1671 | // and writes that happen on it. | 
|  | 1672 |  | 
|  | 1673 | // Attempts to update; returns true if 'region' exists in targetRegions. | 
|  | 1674 | auto updateRegion = | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1675 | [&](const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1676 | &targetRegions) { | 
|  | 1677 | auto it = targetRegions.find(region->memref); | 
|  | 1678 | if (it == targetRegions.end()) | 
|  | 1679 | return false; | 
|  | 1680 |  | 
|  | 1681 | // Perform a union with the existing region. | 
|  | 1682 | if (failed(it->second->unionBoundingBox(*region))) { | 
|  | 1683 | LLVM_DEBUG(llvm::dbgs() | 
|  | 1684 | << "Memory region bounding box failed; " | 
|  | 1685 | "over-approximating to the entire memref\n"); | 
|  | 1686 | // If the union fails, we will overapproximate. | 
|  | 1687 | if (!getFullMemRefAsRegion(opInst, copyDepth, region.get())) { | 
|  | 1688 | LLVM_DEBUG(opInst->emitError( | 
| MLIR Team | 1c73be7 | 2019-09-18 07:44:39 -0700 | [diff] [blame] | 1689 | "non-constant memref sizes not yet supported")); | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1690 | error = true; | 
|  | 1691 | return true; | 
|  | 1692 | } | 
|  | 1693 | it->second->getConstraints()->clearAndCopyFrom( | 
|  | 1694 | *region->getConstraints()); | 
|  | 1695 | } else { | 
|  | 1696 | // Union was computed and stored in 'it->second': copy to 'region'. | 
|  | 1697 | region->getConstraints()->clearAndCopyFrom( | 
|  | 1698 | *it->second->getConstraints()); | 
|  | 1699 | } | 
|  | 1700 | return true; | 
|  | 1701 | }; | 
|  | 1702 |  | 
|  | 1703 | bool existsInRead = updateRegion(readRegions); | 
|  | 1704 | if (error) | 
|  | 1705 | return; | 
|  | 1706 | bool existsInWrite = updateRegion(writeRegions); | 
|  | 1707 | if (error) | 
|  | 1708 | return; | 
|  | 1709 |  | 
|  | 1710 | // Finally add it to the region list. | 
|  | 1711 | if (region->isWrite() && !existsInWrite) { | 
|  | 1712 | writeRegions[region->memref] = std::move(region); | 
|  | 1713 | } else if (!region->isWrite() && !existsInRead) { | 
|  | 1714 | readRegions[region->memref] = std::move(region); | 
|  | 1715 | } | 
|  | 1716 | }); | 
|  | 1717 |  | 
|  | 1718 | if (error) { | 
|  | 1719 | begin->emitError( | 
|  | 1720 | "copy generation failed for one or more memref's in this block\n"); | 
|  | 1721 | return 0; | 
|  | 1722 | } | 
|  | 1723 |  | 
|  | 1724 | uint64_t totalCopyBuffersSizeInBytes = 0; | 
|  | 1725 | bool ret = true; | 
|  | 1726 | auto processRegions = | 
| River Riddle | e62a695 | 2019-12-23 14:45:01 -0800 | [diff] [blame] | 1727 | [&](const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4> | 
| Uday Bondhugula | 4f32ae6 | 2019-09-14 13:23:18 -0700 | [diff] [blame] | 1728 | ®ions) { | 
|  | 1729 | for (const auto ®ionEntry : regions) { | 
|  | 1730 | // For each region, hoist copy in/out past all hoistable | 
|  | 1731 | // 'affine.for's. | 
|  | 1732 | Block::iterator copyInPlacementStart, copyOutPlacementStart; | 
|  | 1733 | Block *copyPlacementBlock; | 
|  | 1734 | findHighestBlockForPlacement( | 
|  | 1735 | *regionEntry.second, *block, begin, end, ©PlacementBlock, | 
|  | 1736 | ©InPlacementStart, ©OutPlacementStart); | 
|  | 1737 |  | 
|  | 1738 | uint64_t sizeInBytes; | 
|  | 1739 | Block::iterator nBegin, nEnd; | 
|  | 1740 | LogicalResult iRet = generateCopy( | 
|  | 1741 | *regionEntry.second, block, begin, end, copyPlacementBlock, | 
|  | 1742 | copyInPlacementStart, copyOutPlacementStart, copyOptions, | 
|  | 1743 | fastBufferMap, copyNests, &sizeInBytes, &nBegin, &nEnd); | 
|  | 1744 | if (succeeded(iRet)) { | 
|  | 1745 | // begin/end could have been invalidated, and need update. | 
|  | 1746 | begin = nBegin; | 
|  | 1747 | end = nEnd; | 
|  | 1748 | totalCopyBuffersSizeInBytes += sizeInBytes; | 
|  | 1749 | } | 
|  | 1750 | ret = ret & succeeded(iRet); | 
|  | 1751 | } | 
|  | 1752 | }; | 
|  | 1753 | processRegions(readRegions); | 
|  | 1754 | processRegions(writeRegions); | 
|  | 1755 |  | 
|  | 1756 | if (!ret) { | 
|  | 1757 | begin->emitError( | 
|  | 1758 | "copy generation failed for one or more memref's in this block\n"); | 
|  | 1759 | return totalCopyBuffersSizeInBytes; | 
|  | 1760 | } | 
|  | 1761 |  | 
|  | 1762 | // For a range of operations, a note will be emitted at the caller. | 
|  | 1763 | AffineForOp forOp; | 
|  | 1764 | uint64_t sizeInKib = llvm::divideCeil(totalCopyBuffersSizeInBytes, 1024); | 
|  | 1765 | if (llvm::DebugFlag && (forOp = dyn_cast<AffineForOp>(&*begin))) { | 
|  | 1766 | forOp.emitRemark() | 
|  | 1767 | << sizeInKib | 
|  | 1768 | << " KiB of copy buffers in fast memory space for this block\n"; | 
|  | 1769 | } | 
|  | 1770 |  | 
|  | 1771 | if (totalCopyBuffersSizeInBytes > copyOptions.fastMemCapacityBytes) { | 
|  | 1772 | StringRef str = "Total size of all copy buffers' for this block " | 
|  | 1773 | "exceeds fast memory capacity\n"; | 
|  | 1774 | block->getParentOp()->emitError(str); | 
|  | 1775 | } | 
|  | 1776 |  | 
|  | 1777 | return totalCopyBuffersSizeInBytes; | 
|  | 1778 | } |