| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 1 | //===- LoopInterchange.cpp - Loop interchange pass-------------------------===// | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | // This Pass handles loop interchange transform. | 
|  | 11 | // This pass interchanges loops to provide a more cache-friendly memory access | 
|  | 12 | // patterns. | 
|  | 13 | // | 
|  | 14 | //===----------------------------------------------------------------------===// | 
|  | 15 |  | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 16 | #include "llvm/ADT/STLExtras.h" | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 17 | #include "llvm/ADT/SmallVector.h" | 
| Florian Hahn | 6e00433 | 2018-04-05 10:39:23 +0000 | [diff] [blame] | 18 | #include "llvm/ADT/Statistic.h" | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 19 | #include "llvm/ADT/StringRef.h" | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 20 | #include "llvm/Analysis/DependenceAnalysis.h" | 
|  | 21 | #include "llvm/Analysis/LoopInfo.h" | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 22 | #include "llvm/Analysis/LoopPass.h" | 
| Adam Nemet | 0965da2 | 2017-10-09 23:19:02 +0000 | [diff] [blame] | 23 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 24 | #include "llvm/Analysis/ScalarEvolution.h" | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 25 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 26 | #include "llvm/IR/BasicBlock.h" | 
|  | 27 | #include "llvm/IR/Constants.h" | 
|  | 28 | #include "llvm/IR/DiagnosticInfo.h" | 
| Benjamin Kramer | 799003b | 2015-03-23 19:32:43 +0000 | [diff] [blame] | 29 | #include "llvm/IR/Dominators.h" | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 30 | #include "llvm/IR/Function.h" | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 31 | #include "llvm/IR/InstrTypes.h" | 
|  | 32 | #include "llvm/IR/Instruction.h" | 
|  | 33 | #include "llvm/IR/Instructions.h" | 
|  | 34 | #include "llvm/IR/Type.h" | 
|  | 35 | #include "llvm/IR/User.h" | 
|  | 36 | #include "llvm/IR/Value.h" | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 37 | #include "llvm/Pass.h" | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 38 | #include "llvm/Support/Casting.h" | 
|  | 39 | #include "llvm/Support/CommandLine.h" | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 40 | #include "llvm/Support/Debug.h" | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 41 | #include "llvm/Support/ErrorHandling.h" | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 42 | #include "llvm/Support/raw_ostream.h" | 
| Benjamin Kramer | 799003b | 2015-03-23 19:32:43 +0000 | [diff] [blame] | 43 | #include "llvm/Transforms/Scalar.h" | 
| David Blaikie | a373d18 | 2018-03-28 17:44:36 +0000 | [diff] [blame] | 44 | #include "llvm/Transforms/Utils.h" | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 45 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | 
| Benjamin Kramer | 799003b | 2015-03-23 19:32:43 +0000 | [diff] [blame] | 46 | #include "llvm/Transforms/Utils/LoopUtils.h" | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 47 | #include <cassert> | 
|  | 48 | #include <utility> | 
|  | 49 | #include <vector> | 
| Davide Italiano | 9d8f6f8 | 2017-01-29 01:55:24 +0000 | [diff] [blame] | 50 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 51 | using namespace llvm; | 
|  | 52 |  | 
|  | 53 | #define DEBUG_TYPE "loop-interchange" | 
|  | 54 |  | 
| Florian Hahn | 6e00433 | 2018-04-05 10:39:23 +0000 | [diff] [blame] | 55 | STATISTIC(LoopsInterchanged, "Number of loops interchanged"); | 
|  | 56 |  | 
| Chad Rosier | 7243189 | 2016-09-14 17:07:13 +0000 | [diff] [blame] | 57 | static cl::opt<int> LoopInterchangeCostThreshold( | 
|  | 58 | "loop-interchange-threshold", cl::init(0), cl::Hidden, | 
|  | 59 | cl::desc("Interchange if you gain more than this number")); | 
|  | 60 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 61 | namespace { | 
|  | 62 |  | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 63 | using LoopVector = SmallVector<Loop *, 8>; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 64 |  | 
|  | 65 | // TODO: Check if we can use a sparse matrix here. | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 66 | using CharMatrix = std::vector<std::vector<char>>; | 
|  | 67 |  | 
|  | 68 | } // end anonymous namespace | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 69 |  | 
|  | 70 | // Maximum number of dependencies that can be handled in the dependency matrix. | 
|  | 71 | static const unsigned MaxMemInstrCount = 100; | 
|  | 72 |  | 
|  | 73 | // Maximum loop depth supported. | 
|  | 74 | static const unsigned MaxLoopNestDepth = 10; | 
|  | 75 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 76 | #ifdef DUMP_DEP_MATRICIES | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 77 | static void printDepMatrix(CharMatrix &DepMatrix) { | 
| Florian Hahn | f66efd6 | 2017-07-24 11:41:30 +0000 | [diff] [blame] | 78 | for (auto &Row : DepMatrix) { | 
|  | 79 | for (auto D : Row) | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 80 | LLVM_DEBUG(dbgs() << D << " "); | 
|  | 81 | LLVM_DEBUG(dbgs() << "\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 82 | } | 
|  | 83 | } | 
|  | 84 | #endif | 
|  | 85 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 86 | static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, | 
| Chandler Carruth | 49c2219 | 2016-05-12 22:19:39 +0000 | [diff] [blame] | 87 | Loop *L, DependenceInfo *DI) { | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 88 | using ValueVector = SmallVector<Value *, 16>; | 
|  | 89 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 90 | ValueVector MemInstr; | 
|  | 91 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 92 | // For each block. | 
| Florian Hahn | f66efd6 | 2017-07-24 11:41:30 +0000 | [diff] [blame] | 93 | for (BasicBlock *BB : L->blocks()) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 94 | // Scan the BB and collect legal loads and stores. | 
| Florian Hahn | f66efd6 | 2017-07-24 11:41:30 +0000 | [diff] [blame] | 95 | for (Instruction &I : *BB) { | 
| Chad Rosier | 09c1109 | 2016-09-13 12:56:04 +0000 | [diff] [blame] | 96 | if (!isa<Instruction>(I)) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 97 | return false; | 
| Florian Hahn | f66efd6 | 2017-07-24 11:41:30 +0000 | [diff] [blame] | 98 | if (auto *Ld = dyn_cast<LoadInst>(&I)) { | 
| Chad Rosier | 09c1109 | 2016-09-13 12:56:04 +0000 | [diff] [blame] | 99 | if (!Ld->isSimple()) | 
|  | 100 | return false; | 
| Florian Hahn | f66efd6 | 2017-07-24 11:41:30 +0000 | [diff] [blame] | 101 | MemInstr.push_back(&I); | 
|  | 102 | } else if (auto *St = dyn_cast<StoreInst>(&I)) { | 
| Chad Rosier | 09c1109 | 2016-09-13 12:56:04 +0000 | [diff] [blame] | 103 | if (!St->isSimple()) | 
|  | 104 | return false; | 
| Florian Hahn | f66efd6 | 2017-07-24 11:41:30 +0000 | [diff] [blame] | 105 | MemInstr.push_back(&I); | 
| Chad Rosier | 09c1109 | 2016-09-13 12:56:04 +0000 | [diff] [blame] | 106 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 107 | } | 
|  | 108 | } | 
|  | 109 |  | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 110 | LLVM_DEBUG(dbgs() << "Found " << MemInstr.size() | 
|  | 111 | << " Loads and Stores to analyze\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 112 |  | 
|  | 113 | ValueVector::iterator I, IE, J, JE; | 
|  | 114 |  | 
|  | 115 | for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { | 
|  | 116 | for (J = I, JE = MemInstr.end(); J != JE; ++J) { | 
|  | 117 | std::vector<char> Dep; | 
| Chad Rosier | 09c1109 | 2016-09-13 12:56:04 +0000 | [diff] [blame] | 118 | Instruction *Src = cast<Instruction>(*I); | 
|  | 119 | Instruction *Dst = cast<Instruction>(*J); | 
| Chad Rosier | 90bcb91 | 2016-09-07 16:07:17 +0000 | [diff] [blame] | 120 | if (Src == Dst) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 121 | continue; | 
| Chad Rosier | 00eb8db | 2016-09-21 19:16:47 +0000 | [diff] [blame] | 122 | // Ignore Input dependencies. | 
| Chad Rosier | 90bcb91 | 2016-09-07 16:07:17 +0000 | [diff] [blame] | 123 | if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 124 | continue; | 
| Chad Rosier | 00eb8db | 2016-09-21 19:16:47 +0000 | [diff] [blame] | 125 | // Track Output, Flow, and Anti dependencies. | 
| Chad Rosier | 90bcb91 | 2016-09-07 16:07:17 +0000 | [diff] [blame] | 126 | if (auto D = DI->depends(Src, Dst, true)) { | 
| Chad Rosier | 00eb8db | 2016-09-21 19:16:47 +0000 | [diff] [blame] | 127 | assert(D->isOrdered() && "Expected an output, flow or anti dep."); | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 128 | LLVM_DEBUG(StringRef DepType = | 
|  | 129 | D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; | 
|  | 130 | dbgs() << "Found " << DepType | 
|  | 131 | << " dependency between Src and Dst\n" | 
|  | 132 | << " Src:" << *Src << "\n Dst:" << *Dst << '\n'); | 
| Chad Rosier | 00eb8db | 2016-09-21 19:16:47 +0000 | [diff] [blame] | 133 | unsigned Levels = D->getLevels(); | 
|  | 134 | char Direction; | 
|  | 135 | for (unsigned II = 1; II <= Levels; ++II) { | 
|  | 136 | const SCEV *Distance = D->getDistance(II); | 
|  | 137 | const SCEVConstant *SCEVConst = | 
|  | 138 | dyn_cast_or_null<SCEVConstant>(Distance); | 
|  | 139 | if (SCEVConst) { | 
|  | 140 | const ConstantInt *CI = SCEVConst->getValue(); | 
|  | 141 | if (CI->isNegative()) | 
|  | 142 | Direction = '<'; | 
|  | 143 | else if (CI->isZero()) | 
|  | 144 | Direction = '='; | 
|  | 145 | else | 
|  | 146 | Direction = '>'; | 
|  | 147 | Dep.push_back(Direction); | 
|  | 148 | } else if (D->isScalar(II)) { | 
|  | 149 | Direction = 'S'; | 
|  | 150 | Dep.push_back(Direction); | 
|  | 151 | } else { | 
|  | 152 | unsigned Dir = D->getDirection(II); | 
|  | 153 | if (Dir == Dependence::DVEntry::LT || | 
|  | 154 | Dir == Dependence::DVEntry::LE) | 
|  | 155 | Direction = '<'; | 
|  | 156 | else if (Dir == Dependence::DVEntry::GT || | 
|  | 157 | Dir == Dependence::DVEntry::GE) | 
|  | 158 | Direction = '>'; | 
|  | 159 | else if (Dir == Dependence::DVEntry::EQ) | 
|  | 160 | Direction = '='; | 
|  | 161 | else | 
|  | 162 | Direction = '*'; | 
|  | 163 | Dep.push_back(Direction); | 
|  | 164 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 165 | } | 
| Chad Rosier | 00eb8db | 2016-09-21 19:16:47 +0000 | [diff] [blame] | 166 | while (Dep.size() != Level) { | 
|  | 167 | Dep.push_back('I'); | 
|  | 168 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 169 |  | 
| Chad Rosier | 00eb8db | 2016-09-21 19:16:47 +0000 | [diff] [blame] | 170 | DepMatrix.push_back(Dep); | 
|  | 171 | if (DepMatrix.size() > MaxMemInstrCount) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 172 | LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount | 
|  | 173 | << " dependencies inside loop\n"); | 
| Chad Rosier | 00eb8db | 2016-09-21 19:16:47 +0000 | [diff] [blame] | 174 | return false; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 175 | } | 
|  | 176 | } | 
|  | 177 | } | 
|  | 178 | } | 
|  | 179 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 180 | return true; | 
|  | 181 | } | 
|  | 182 |  | 
|  | 183 | // A loop is moved from index 'from' to an index 'to'. Update the Dependence | 
|  | 184 | // matrix by exchanging the two columns. | 
| Chad Rosier | d18ea06 | 2016-09-13 13:00:29 +0000 | [diff] [blame] | 185 | static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, | 
|  | 186 | unsigned ToIndx) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 187 | unsigned numRows = DepMatrix.size(); | 
|  | 188 | for (unsigned i = 0; i < numRows; ++i) { | 
|  | 189 | char TmpVal = DepMatrix[i][ToIndx]; | 
|  | 190 | DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx]; | 
|  | 191 | DepMatrix[i][FromIndx] = TmpVal; | 
|  | 192 | } | 
|  | 193 | } | 
|  | 194 |  | 
|  | 195 | // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is | 
|  | 196 | // '>' | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 197 | static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row, | 
|  | 198 | unsigned Column) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 199 | for (unsigned i = 0; i <= Column; ++i) { | 
|  | 200 | if (DepMatrix[Row][i] == '<') | 
|  | 201 | return false; | 
|  | 202 | if (DepMatrix[Row][i] == '>') | 
|  | 203 | return true; | 
|  | 204 | } | 
|  | 205 | // All dependencies were '=','S' or 'I' | 
|  | 206 | return false; | 
|  | 207 | } | 
|  | 208 |  | 
|  | 209 | // Checks if no dependence exist in the dependency matrix in Row before Column. | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 210 | static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, | 
|  | 211 | unsigned Column) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 212 | for (unsigned i = 0; i < Column; ++i) { | 
| Chandler Carruth | fca1ff0 | 2016-11-03 16:39:25 +0000 | [diff] [blame] | 213 | if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' && | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 214 | DepMatrix[Row][i] != 'I') | 
|  | 215 | return false; | 
|  | 216 | } | 
|  | 217 | return true; | 
|  | 218 | } | 
|  | 219 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 220 | static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, | 
|  | 221 | unsigned OuterLoopId, char InnerDep, | 
|  | 222 | char OuterDep) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 223 | if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) | 
|  | 224 | return false; | 
|  | 225 |  | 
|  | 226 | if (InnerDep == OuterDep) | 
|  | 227 | return true; | 
|  | 228 |  | 
|  | 229 | // It is legal to interchange if and only if after interchange no row has a | 
|  | 230 | // '>' direction as the leftmost non-'='. | 
|  | 231 |  | 
|  | 232 | if (InnerDep == '=' || InnerDep == 'S' || InnerDep == 'I') | 
|  | 233 | return true; | 
|  | 234 |  | 
|  | 235 | if (InnerDep == '<') | 
|  | 236 | return true; | 
|  | 237 |  | 
|  | 238 | if (InnerDep == '>') { | 
|  | 239 | // If OuterLoopId represents outermost loop then interchanging will make the | 
|  | 240 | // 1st dependency as '>' | 
|  | 241 | if (OuterLoopId == 0) | 
|  | 242 | return false; | 
|  | 243 |  | 
|  | 244 | // If all dependencies before OuterloopId are '=','S'or 'I'. Then | 
|  | 245 | // interchanging will result in this row having an outermost non '=' | 
|  | 246 | // dependency of '>' | 
|  | 247 | if (!containsNoDependence(DepMatrix, Row, OuterLoopId)) | 
|  | 248 | return true; | 
|  | 249 | } | 
|  | 250 |  | 
|  | 251 | return false; | 
|  | 252 | } | 
|  | 253 |  | 
|  | 254 | // Checks if it is legal to interchange 2 loops. | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 255 | // [Theorem] A permutation of the loops in a perfect nest is legal if and only | 
| Chad Rosier | 61683a2 | 2016-09-13 13:08:53 +0000 | [diff] [blame] | 256 | // if the direction matrix, after the same permutation is applied to its | 
|  | 257 | // columns, has no ">" direction as the leftmost non-"=" direction in any row. | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 258 | static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, | 
|  | 259 | unsigned InnerLoopId, | 
|  | 260 | unsigned OuterLoopId) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 261 | unsigned NumRows = DepMatrix.size(); | 
|  | 262 | // For each row check if it is valid to interchange. | 
|  | 263 | for (unsigned Row = 0; Row < NumRows; ++Row) { | 
|  | 264 | char InnerDep = DepMatrix[Row][InnerLoopId]; | 
|  | 265 | char OuterDep = DepMatrix[Row][OuterLoopId]; | 
|  | 266 | if (InnerDep == '*' || OuterDep == '*') | 
|  | 267 | return false; | 
| Chad Rosier | 61683a2 | 2016-09-13 13:08:53 +0000 | [diff] [blame] | 268 | if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep)) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 269 | return false; | 
|  | 270 | } | 
|  | 271 | return true; | 
|  | 272 | } | 
|  | 273 |  | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 274 | static LoopVector populateWorklist(Loop &L) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 275 | LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: " | 
|  | 276 | << L.getHeader()->getParent()->getName() << " Loop: %" | 
|  | 277 | << L.getHeader()->getName() << '\n'); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 278 | LoopVector LoopList; | 
|  | 279 | Loop *CurrentLoop = &L; | 
| Benjamin Kramer | e448b5b | 2015-07-13 17:21:14 +0000 | [diff] [blame] | 280 | const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops(); | 
|  | 281 | while (!Vec->empty()) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 282 | // The current loop has multiple subloops in it hence it is not tightly | 
|  | 283 | // nested. | 
|  | 284 | // Discard all loops above it added into Worklist. | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 285 | if (Vec->size() != 1) | 
|  | 286 | return {}; | 
|  | 287 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 288 | LoopList.push_back(CurrentLoop); | 
| Benjamin Kramer | e448b5b | 2015-07-13 17:21:14 +0000 | [diff] [blame] | 289 | CurrentLoop = Vec->front(); | 
|  | 290 | Vec = &CurrentLoop->getSubLoops(); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 291 | } | 
|  | 292 | LoopList.push_back(CurrentLoop); | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 293 | return LoopList; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 294 | } | 
|  | 295 |  | 
|  | 296 | static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { | 
|  | 297 | PHINode *InnerIndexVar = L->getCanonicalInductionVariable(); | 
|  | 298 | if (InnerIndexVar) | 
|  | 299 | return InnerIndexVar; | 
|  | 300 | if (L->getLoopLatch() == nullptr || L->getLoopPredecessor() == nullptr) | 
|  | 301 | return nullptr; | 
|  | 302 | for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { | 
|  | 303 | PHINode *PhiVar = cast<PHINode>(I); | 
|  | 304 | Type *PhiTy = PhiVar->getType(); | 
|  | 305 | if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && | 
|  | 306 | !PhiTy->isPointerTy()) | 
|  | 307 | return nullptr; | 
|  | 308 | const SCEVAddRecExpr *AddRec = | 
|  | 309 | dyn_cast<SCEVAddRecExpr>(SE->getSCEV(PhiVar)); | 
|  | 310 | if (!AddRec || !AddRec->isAffine()) | 
|  | 311 | continue; | 
|  | 312 | const SCEV *Step = AddRec->getStepRecurrence(*SE); | 
| Chad Rosier | f7c76f9 | 2016-09-21 13:28:41 +0000 | [diff] [blame] | 313 | if (!isa<SCEVConstant>(Step)) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 314 | continue; | 
|  | 315 | // Found the induction variable. | 
|  | 316 | // FIXME: Handle loops with more than one induction variable. Note that, | 
|  | 317 | // currently, legality makes sure we have only one induction variable. | 
|  | 318 | return PhiVar; | 
|  | 319 | } | 
|  | 320 | return nullptr; | 
|  | 321 | } | 
|  | 322 |  | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 323 | namespace { | 
|  | 324 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 325 | /// LoopInterchangeLegality checks if it is legal to interchange the loop. | 
|  | 326 | class LoopInterchangeLegality { | 
|  | 327 | public: | 
|  | 328 | LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 329 | OptimizationRemarkEmitter *ORE) | 
| Florian Hahn | c51d088 | 2018-09-06 10:41:01 +0000 | [diff] [blame] | 330 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 331 |  | 
|  | 332 | /// Check if the loops can be interchanged. | 
|  | 333 | bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, | 
|  | 334 | CharMatrix &DepMatrix); | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 335 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 336 | /// Check if the loop structure is understood. We do not handle triangular | 
|  | 337 | /// loops for now. | 
|  | 338 | bool isLoopStructureUnderstood(PHINode *InnerInductionVar); | 
|  | 339 |  | 
|  | 340 | bool currentLimitations(); | 
|  | 341 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 342 | bool hasInnerLoopReduction() { return InnerLoopHasReduction; } | 
|  | 343 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 344 | private: | 
|  | 345 | bool tightlyNested(Loop *Outer, Loop *Inner); | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 346 | bool containsUnsafeInstructionsInHeader(BasicBlock *BB); | 
|  | 347 | bool areAllUsesReductions(Instruction *Ins, Loop *L); | 
|  | 348 | bool containsUnsafeInstructionsInLatch(BasicBlock *BB); | 
|  | 349 | bool findInductionAndReductions(Loop *L, | 
|  | 350 | SmallVector<PHINode *, 8> &Inductions, | 
|  | 351 | SmallVector<PHINode *, 8> &Reductions); | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 352 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 353 | Loop *OuterLoop; | 
|  | 354 | Loop *InnerLoop; | 
|  | 355 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 356 | ScalarEvolution *SE; | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 357 |  | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 358 | /// Interface to emit optimization remarks. | 
|  | 359 | OptimizationRemarkEmitter *ORE; | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 360 |  | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 361 | bool InnerLoopHasReduction = false; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 362 | }; | 
|  | 363 |  | 
|  | 364 | /// LoopInterchangeProfitability checks if it is profitable to interchange the | 
|  | 365 | /// loop. | 
|  | 366 | class LoopInterchangeProfitability { | 
|  | 367 | public: | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 368 | LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE, | 
|  | 369 | OptimizationRemarkEmitter *ORE) | 
|  | 370 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 371 |  | 
| Vikram TV | 74b4111 | 2015-12-09 05:16:24 +0000 | [diff] [blame] | 372 | /// Check if the loop interchange is profitable. | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 373 | bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, | 
|  | 374 | CharMatrix &DepMatrix); | 
|  | 375 |  | 
|  | 376 | private: | 
|  | 377 | int getInstrOrderCost(); | 
|  | 378 |  | 
|  | 379 | Loop *OuterLoop; | 
|  | 380 | Loop *InnerLoop; | 
|  | 381 |  | 
|  | 382 | /// Scev analysis. | 
|  | 383 | ScalarEvolution *SE; | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 384 |  | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 385 | /// Interface to emit optimization remarks. | 
|  | 386 | OptimizationRemarkEmitter *ORE; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 387 | }; | 
|  | 388 |  | 
| Vikram TV | 74b4111 | 2015-12-09 05:16:24 +0000 | [diff] [blame] | 389 | /// LoopInterchangeTransform interchanges the loop. | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 390 | class LoopInterchangeTransform { | 
|  | 391 | public: | 
|  | 392 | LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, | 
|  | 393 | LoopInfo *LI, DominatorTree *DT, | 
| Justin Bogner | 843fb20 | 2015-12-15 19:40:57 +0000 | [diff] [blame] | 394 | BasicBlock *LoopNestExit, | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 395 | bool InnerLoopContainsReductions) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 396 | : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 397 | LoopExit(LoopNestExit), | 
|  | 398 | InnerLoopHasReduction(InnerLoopContainsReductions) {} | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 399 |  | 
|  | 400 | /// Interchange OuterLoop and InnerLoop. | 
|  | 401 | bool transform(); | 
| Florian Hahn | 831a757 | 2018-04-05 09:48:45 +0000 | [diff] [blame] | 402 | void restructureLoops(Loop *NewInner, Loop *NewOuter, | 
|  | 403 | BasicBlock *OrigInnerPreHeader, | 
|  | 404 | BasicBlock *OrigOuterPreHeader); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 405 | void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 406 |  | 
|  | 407 | private: | 
|  | 408 | void splitInnerLoopLatch(Instruction *); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 409 | void splitInnerLoopHeader(); | 
|  | 410 | bool adjustLoopLinks(); | 
|  | 411 | void adjustLoopPreheaders(); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 412 | bool adjustLoopBranches(); | 
|  | 413 |  | 
|  | 414 | Loop *OuterLoop; | 
|  | 415 | Loop *InnerLoop; | 
|  | 416 |  | 
|  | 417 | /// Scev analysis. | 
|  | 418 | ScalarEvolution *SE; | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 419 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 420 | LoopInfo *LI; | 
|  | 421 | DominatorTree *DT; | 
|  | 422 | BasicBlock *LoopExit; | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 423 | bool InnerLoopHasReduction; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 424 | }; | 
|  | 425 |  | 
| Vikram TV | 74b4111 | 2015-12-09 05:16:24 +0000 | [diff] [blame] | 426 | // Main LoopInterchange Pass. | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 427 | struct LoopInterchange : public LoopPass { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 428 | static char ID; | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 429 | ScalarEvolution *SE = nullptr; | 
|  | 430 | LoopInfo *LI = nullptr; | 
|  | 431 | DependenceInfo *DI = nullptr; | 
|  | 432 | DominatorTree *DT = nullptr; | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 433 |  | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 434 | /// Interface to emit optimization remarks. | 
|  | 435 | OptimizationRemarkEmitter *ORE; | 
|  | 436 |  | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 437 | LoopInterchange() : LoopPass(ID) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 438 | initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); | 
|  | 439 | } | 
|  | 440 |  | 
|  | 441 | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
| Chandler Carruth | 49c2219 | 2016-05-12 22:19:39 +0000 | [diff] [blame] | 442 | AU.addRequired<DependenceAnalysisWrapperPass>(); | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 443 | AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); | 
| Florian Hahn | c6296fe | 2018-02-14 13:13:15 +0000 | [diff] [blame] | 444 |  | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 445 | getLoopAnalysisUsage(AU); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 446 | } | 
|  | 447 |  | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 448 | bool runOnLoop(Loop *L, LPPassManager &LPM) override { | 
|  | 449 | if (skipLoop(L) || L->getParentLoop()) | 
| Florian Hahn | 8d72ecc | 2018-09-28 10:20:07 +0000 | [diff] [blame] | 450 | return false; | 
| Andrew Kaylor | 50271f7 | 2016-05-03 22:32:30 +0000 | [diff] [blame] | 451 |  | 
| Chandler Carruth | 2f1fd16 | 2015-08-17 02:08:17 +0000 | [diff] [blame] | 452 | SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 453 | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | 
| Chandler Carruth | 49c2219 | 2016-05-12 22:19:39 +0000 | [diff] [blame] | 454 | DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI(); | 
| Florian Hahn | c6296fe | 2018-02-14 13:13:15 +0000 | [diff] [blame] | 455 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 456 | ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); | 
| Justin Bogner | 843fb20 | 2015-12-15 19:40:57 +0000 | [diff] [blame] | 457 |  | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 458 | return processLoopList(populateWorklist(*L)); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 459 | } | 
|  | 460 |  | 
|  | 461 | bool isComputableLoopNest(LoopVector LoopList) { | 
| Benjamin Kramer | 135f735 | 2016-06-26 12:28:59 +0000 | [diff] [blame] | 462 | for (Loop *L : LoopList) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 463 | const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L); | 
|  | 464 | if (ExitCountOuter == SE->getCouldNotCompute()) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 465 | LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 466 | return false; | 
|  | 467 | } | 
|  | 468 | if (L->getNumBackEdges() != 1) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 469 | LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 470 | return false; | 
|  | 471 | } | 
|  | 472 | if (!L->getExitingBlock()) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 473 | LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 474 | return false; | 
|  | 475 | } | 
|  | 476 | } | 
|  | 477 | return true; | 
|  | 478 | } | 
|  | 479 |  | 
| Benjamin Kramer | c321e53 | 2016-06-08 19:09:22 +0000 | [diff] [blame] | 480 | unsigned selectLoopForInterchange(const LoopVector &LoopList) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 481 | // TODO: Add a better heuristic to select the loop to be interchanged based | 
| Benjamin Kramer | df005cb | 2015-08-08 18:27:36 +0000 | [diff] [blame] | 482 | // on the dependence matrix. Currently we select the innermost loop. | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 483 | return LoopList.size() - 1; | 
|  | 484 | } | 
|  | 485 |  | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 486 | bool processLoopList(LoopVector LoopList) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 487 | bool Changed = false; | 
| Chad Rosier | 7ea0d39 | 2016-09-13 13:30:30 +0000 | [diff] [blame] | 488 | unsigned LoopNestDepth = LoopList.size(); | 
|  | 489 | if (LoopNestDepth < 2) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 490 | LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 491 | return false; | 
|  | 492 | } | 
| Chad Rosier | 7ea0d39 | 2016-09-13 13:30:30 +0000 | [diff] [blame] | 493 | if (LoopNestDepth > MaxLoopNestDepth) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 494 | LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than " | 
|  | 495 | << MaxLoopNestDepth << "\n"); | 
| Chad Rosier | 7ea0d39 | 2016-09-13 13:30:30 +0000 | [diff] [blame] | 496 | return false; | 
|  | 497 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 498 | if (!isComputableLoopNest(LoopList)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 499 | LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 500 | return false; | 
|  | 501 | } | 
| Chad Rosier | 7ea0d39 | 2016-09-13 13:30:30 +0000 | [diff] [blame] | 502 |  | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 503 | LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth | 
|  | 504 | << "\n"); | 
| Chad Rosier | 7ea0d39 | 2016-09-13 13:30:30 +0000 | [diff] [blame] | 505 |  | 
|  | 506 | CharMatrix DependencyMatrix; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 507 | Loop *OuterMostLoop = *(LoopList.begin()); | 
| Chad Rosier | 7ea0d39 | 2016-09-13 13:30:30 +0000 | [diff] [blame] | 508 | if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth, | 
| Chandler Carruth | 49c2219 | 2016-05-12 22:19:39 +0000 | [diff] [blame] | 509 | OuterMostLoop, DI)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 510 | LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 511 | return false; | 
|  | 512 | } | 
|  | 513 | #ifdef DUMP_DEP_MATRICIES | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 514 | LLVM_DEBUG(dbgs() << "Dependence before interchange\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 515 | printDepMatrix(DependencyMatrix); | 
|  | 516 | #endif | 
|  | 517 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 518 | // Get the Outermost loop exit. | 
| Florian Hahn | 1da30c6 | 2018-04-25 09:35:54 +0000 | [diff] [blame] | 519 | BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock(); | 
|  | 520 | if (!LoopNestExit) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 521 | LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block"); | 
| Florian Hahn | 1da30c6 | 2018-04-25 09:35:54 +0000 | [diff] [blame] | 522 | return false; | 
|  | 523 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 524 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 525 | unsigned SelecLoopId = selectLoopForInterchange(LoopList); | 
| Benjamin Kramer | df005cb | 2015-08-08 18:27:36 +0000 | [diff] [blame] | 526 | // Move the selected loop outwards to the best possible position. | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 527 | for (unsigned i = SelecLoopId; i > 0; i--) { | 
|  | 528 | bool Interchanged = | 
|  | 529 | processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix); | 
|  | 530 | if (!Interchanged) | 
|  | 531 | return Changed; | 
|  | 532 | // Loops interchanged reflect the same in LoopList | 
| Benjamin Kramer | 7944292 | 2015-03-06 18:59:14 +0000 | [diff] [blame] | 533 | std::swap(LoopList[i - 1], LoopList[i]); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 534 |  | 
|  | 535 | // Update the DependencyMatrix | 
| Chad Rosier | d18ea06 | 2016-09-13 13:00:29 +0000 | [diff] [blame] | 536 | interChangeDependencies(DependencyMatrix, i, i - 1); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 537 | #ifdef DUMP_DEP_MATRICIES | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 538 | LLVM_DEBUG(dbgs() << "Dependence after interchange\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 539 | printDepMatrix(DependencyMatrix); | 
|  | 540 | #endif | 
|  | 541 | Changed |= Interchanged; | 
|  | 542 | } | 
|  | 543 | return Changed; | 
|  | 544 | } | 
|  | 545 |  | 
|  | 546 | bool processLoop(LoopVector LoopList, unsigned InnerLoopId, | 
|  | 547 | unsigned OuterLoopId, BasicBlock *LoopNestExit, | 
|  | 548 | std::vector<std::vector<char>> &DependencyMatrix) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 549 | LLVM_DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId | 
|  | 550 | << " and OuterLoopId = " << OuterLoopId << "\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 551 | Loop *InnerLoop = LoopList[InnerLoopId]; | 
|  | 552 | Loop *OuterLoop = LoopList[OuterLoopId]; | 
|  | 553 |  | 
| Florian Hahn | c51d088 | 2018-09-06 10:41:01 +0000 | [diff] [blame] | 554 | LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 555 | if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 556 | LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 557 | return false; | 
|  | 558 | } | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 559 | LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n"); | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 560 | LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 561 | if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 562 | LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 563 | return false; | 
|  | 564 | } | 
|  | 565 |  | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 566 | ORE->emit([&]() { | 
|  | 567 | return OptimizationRemark(DEBUG_TYPE, "Interchanged", | 
|  | 568 | InnerLoop->getStartLoc(), | 
|  | 569 | InnerLoop->getHeader()) | 
|  | 570 | << "Loop interchanged with enclosing loop."; | 
|  | 571 | }); | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 572 |  | 
| Justin Bogner | 843fb20 | 2015-12-15 19:40:57 +0000 | [diff] [blame] | 573 | LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 574 | LoopNestExit, LIL.hasInnerLoopReduction()); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 575 | LIT.transform(); | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 576 | LLVM_DEBUG(dbgs() << "Loops interchanged.\n"); | 
| Florian Hahn | 6e00433 | 2018-04-05 10:39:23 +0000 | [diff] [blame] | 577 | LoopsInterchanged++; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 578 | return true; | 
|  | 579 | } | 
|  | 580 | }; | 
|  | 581 |  | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 582 | } // end anonymous namespace | 
|  | 583 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 584 | bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) { | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 585 | return llvm::none_of(Ins->users(), [=](User *U) -> bool { | 
| David Majnemer | 0a16c22 | 2016-08-11 21:15:00 +0000 | [diff] [blame] | 586 | auto *UserIns = dyn_cast<PHINode>(U); | 
| Tyler Nowicki | 0a91310 | 2015-06-16 18:07:34 +0000 | [diff] [blame] | 587 | RecurrenceDescriptor RD; | 
|  | 588 | return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD); | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 589 | }); | 
|  | 590 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 591 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 592 | bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader( | 
|  | 593 | BasicBlock *BB) { | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 594 | for (Instruction &I : *BB) { | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 595 | // Load corresponding to reduction PHI's are safe while concluding if | 
|  | 596 | // tightly nested. | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 597 | if (LoadInst *L = dyn_cast<LoadInst>(&I)) { | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 598 | if (!areAllUsesReductions(L, InnerLoop)) | 
|  | 599 | return true; | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 600 | } else if (I.mayHaveSideEffects() || I.mayReadFromMemory()) | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 601 | return true; | 
|  | 602 | } | 
|  | 603 | return false; | 
|  | 604 | } | 
|  | 605 |  | 
|  | 606 | bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch( | 
|  | 607 | BasicBlock *BB) { | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 608 | for (Instruction &I : *BB) { | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 609 | // Stores corresponding to reductions are safe while concluding if tightly | 
|  | 610 | // nested. | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 611 | if (StoreInst *L = dyn_cast<StoreInst>(&I)) { | 
| Chad Rosier | f7c76f9 | 2016-09-21 13:28:41 +0000 | [diff] [blame] | 612 | if (!isa<PHINode>(L->getOperand(0))) | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 613 | return true; | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 614 | } else if (I.mayHaveSideEffects() || I.mayReadFromMemory()) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 615 | return true; | 
|  | 616 | } | 
|  | 617 | return false; | 
|  | 618 | } | 
|  | 619 |  | 
|  | 620 | bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { | 
|  | 621 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); | 
|  | 622 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | 
|  | 623 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); | 
|  | 624 |  | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 625 | LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 626 |  | 
|  | 627 | // A perfectly nested loop will not have any branch in between the outer and | 
|  | 628 | // inner block i.e. outer header will branch to either inner preheader and | 
|  | 629 | // outerloop latch. | 
| Chad Rosier | f7c76f9 | 2016-09-21 13:28:41 +0000 | [diff] [blame] | 630 | BranchInst *OuterLoopHeaderBI = | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 631 | dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); | 
| Chad Rosier | f7c76f9 | 2016-09-21 13:28:41 +0000 | [diff] [blame] | 632 | if (!OuterLoopHeaderBI) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 633 | return false; | 
| Chad Rosier | f7c76f9 | 2016-09-21 13:28:41 +0000 | [diff] [blame] | 634 |  | 
| Chandler Carruth | 96fc1de | 2018-08-26 08:41:15 +0000 | [diff] [blame] | 635 | for (BasicBlock *Succ : successors(OuterLoopHeaderBI)) | 
| Florian Hahn | 236f6fe | 2018-09-06 09:57:27 +0000 | [diff] [blame] | 636 | if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() && | 
|  | 637 | Succ != OuterLoopLatch) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 638 | return false; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 639 |  | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 640 | LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 641 | // We do not have any basic block in between now make sure the outer header | 
| Benjamin Kramer | df005cb | 2015-08-08 18:27:36 +0000 | [diff] [blame] | 642 | // and outer loop latch doesn't contain any unsafe instructions. | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 643 | if (containsUnsafeInstructionsInHeader(OuterLoopHeader) || | 
|  | 644 | containsUnsafeInstructionsInLatch(OuterLoopLatch)) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 645 | return false; | 
|  | 646 |  | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 647 | LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 648 | // We have a perfect loop nest. | 
|  | 649 | return true; | 
|  | 650 | } | 
|  | 651 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 652 | bool LoopInterchangeLegality::isLoopStructureUnderstood( | 
|  | 653 | PHINode *InnerInduction) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 654 | unsigned Num = InnerInduction->getNumOperands(); | 
|  | 655 | BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); | 
|  | 656 | for (unsigned i = 0; i < Num; ++i) { | 
|  | 657 | Value *Val = InnerInduction->getOperand(i); | 
|  | 658 | if (isa<Constant>(Val)) | 
|  | 659 | continue; | 
|  | 660 | Instruction *I = dyn_cast<Instruction>(Val); | 
|  | 661 | if (!I) | 
|  | 662 | return false; | 
|  | 663 | // TODO: Handle triangular loops. | 
|  | 664 | // e.g. for(int i=0;i<N;i++) | 
|  | 665 | //        for(int j=i;j<N;j++) | 
|  | 666 | unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i); | 
|  | 667 | if (InnerInduction->getIncomingBlock(IncomBlockIndx) == | 
|  | 668 | InnerLoopPreheader && | 
|  | 669 | !OuterLoop->isLoopInvariant(I)) { | 
|  | 670 | return false; | 
|  | 671 | } | 
|  | 672 | } | 
|  | 673 | return true; | 
|  | 674 | } | 
|  | 675 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 676 | bool LoopInterchangeLegality::findInductionAndReductions( | 
|  | 677 | Loop *L, SmallVector<PHINode *, 8> &Inductions, | 
|  | 678 | SmallVector<PHINode *, 8> &Reductions) { | 
|  | 679 | if (!L->getLoopLatch() || !L->getLoopPredecessor()) | 
|  | 680 | return false; | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 681 | for (PHINode &PHI : L->getHeader()->phis()) { | 
| Tyler Nowicki | 0a91310 | 2015-06-16 18:07:34 +0000 | [diff] [blame] | 682 | RecurrenceDescriptor RD; | 
| James Molloy | 1bbf15c | 2015-08-27 09:53:00 +0000 | [diff] [blame] | 683 | InductionDescriptor ID; | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 684 | if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) | 
|  | 685 | Inductions.push_back(&PHI); | 
|  | 686 | else if (RecurrenceDescriptor::isReductionPHI(&PHI, L, RD)) | 
|  | 687 | Reductions.push_back(&PHI); | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 688 | else { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 689 | LLVM_DEBUG( | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 690 | dbgs() << "Failed to recognize PHI as an induction or reduction.\n"); | 
|  | 691 | return false; | 
|  | 692 | } | 
|  | 693 | } | 
|  | 694 | return true; | 
|  | 695 | } | 
|  | 696 |  | 
|  | 697 | static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) { | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 698 | for (PHINode &PHI : Block->phis()) { | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 699 | // Reduction lcssa phi will have only 1 incoming block that from loop latch. | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 700 | if (PHI.getNumIncomingValues() > 1) | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 701 | return false; | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 702 | Instruction *Ins = dyn_cast<Instruction>(PHI.getIncomingValue(0)); | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 703 | if (!Ins) | 
|  | 704 | return false; | 
|  | 705 | // Incoming value for lcssa phi's in outer loop exit can only be inner loop | 
|  | 706 | // exits lcssa phi else it would not be tightly nested. | 
|  | 707 | if (!isa<PHINode>(Ins) && isOuterLoopExitBlock) | 
|  | 708 | return false; | 
|  | 709 | } | 
|  | 710 | return true; | 
|  | 711 | } | 
|  | 712 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 713 | // This function indicates the current limitations in the transform as a result | 
|  | 714 | // of which we do not proceed. | 
|  | 715 | bool LoopInterchangeLegality::currentLimitations() { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 716 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 717 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); | 
| Florian Hahn | 1da30c6 | 2018-04-25 09:35:54 +0000 | [diff] [blame] | 718 |  | 
|  | 719 | // transform currently expects the loop latches to also be the exiting | 
|  | 720 | // blocks. | 
|  | 721 | if (InnerLoop->getExitingBlock() != InnerLoopLatch || | 
|  | 722 | OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() || | 
|  | 723 | !isa<BranchInst>(InnerLoopLatch->getTerminator()) || | 
|  | 724 | !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 725 | LLVM_DEBUG( | 
|  | 726 | dbgs() << "Loops where the latch is not the exiting block are not" | 
|  | 727 | << " supported currently.\n"); | 
| Florian Hahn | 1da30c6 | 2018-04-25 09:35:54 +0000 | [diff] [blame] | 728 | ORE->emit([&]() { | 
|  | 729 | return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch", | 
|  | 730 | OuterLoop->getStartLoc(), | 
|  | 731 | OuterLoop->getHeader()) | 
|  | 732 | << "Loops where the latch is not the exiting block cannot be" | 
|  | 733 | " interchange currently."; | 
|  | 734 | }); | 
|  | 735 | return true; | 
|  | 736 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 737 |  | 
|  | 738 | PHINode *InnerInductionVar; | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 739 | SmallVector<PHINode *, 8> Inductions; | 
|  | 740 | SmallVector<PHINode *, 8> Reductions; | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 741 | if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 742 | LLVM_DEBUG( | 
|  | 743 | dbgs() << "Only inner loops with induction or reduction PHI nodes " | 
|  | 744 | << "are supported currently.\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 745 | ORE->emit([&]() { | 
|  | 746 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", | 
|  | 747 | InnerLoop->getStartLoc(), | 
|  | 748 | InnerLoop->getHeader()) | 
|  | 749 | << "Only inner loops with induction or reduction PHI nodes can be" | 
|  | 750 | " interchange currently."; | 
|  | 751 | }); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 752 | return true; | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 753 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 754 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 755 | // TODO: Currently we handle only loops with 1 induction variable. | 
|  | 756 | if (Inductions.size() != 1) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 757 | LLVM_DEBUG( | 
|  | 758 | dbgs() << "We currently only support loops with 1 induction variable." | 
|  | 759 | << "Failed to interchange due to current limitation\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 760 | ORE->emit([&]() { | 
|  | 761 | return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner", | 
|  | 762 | InnerLoop->getStartLoc(), | 
|  | 763 | InnerLoop->getHeader()) | 
|  | 764 | << "Only inner loops with 1 induction variable can be " | 
|  | 765 | "interchanged currently."; | 
|  | 766 | }); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 767 | return true; | 
|  | 768 | } | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 769 | if (Reductions.size() > 0) | 
|  | 770 | InnerLoopHasReduction = true; | 
|  | 771 |  | 
|  | 772 | InnerInductionVar = Inductions.pop_back_val(); | 
|  | 773 | Reductions.clear(); | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 774 | if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 775 | LLVM_DEBUG( | 
|  | 776 | dbgs() << "Only outer loops with induction or reduction PHI nodes " | 
|  | 777 | << "are supported currently.\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 778 | ORE->emit([&]() { | 
|  | 779 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter", | 
|  | 780 | OuterLoop->getStartLoc(), | 
|  | 781 | OuterLoop->getHeader()) | 
|  | 782 | << "Only outer loops with induction or reduction PHI nodes can be" | 
|  | 783 | " interchanged currently."; | 
|  | 784 | }); | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 785 | return true; | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 786 | } | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 787 |  | 
|  | 788 | // Outer loop cannot have reduction because then loops will not be tightly | 
|  | 789 | // nested. | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 790 | if (!Reductions.empty()) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 791 | LLVM_DEBUG(dbgs() << "Outer loops with reductions are not supported " | 
|  | 792 | << "currently.\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 793 | ORE->emit([&]() { | 
|  | 794 | return OptimizationRemarkMissed(DEBUG_TYPE, "ReductionsOuter", | 
|  | 795 | OuterLoop->getStartLoc(), | 
|  | 796 | OuterLoop->getHeader()) | 
|  | 797 | << "Outer loops with reductions cannot be interchangeed " | 
|  | 798 | "currently."; | 
|  | 799 | }); | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 800 | return true; | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 801 | } | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 802 | // TODO: Currently we handle only loops with 1 induction variable. | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 803 | if (Inductions.size() != 1) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 804 | LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not " | 
|  | 805 | << "supported currently.\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 806 | ORE->emit([&]() { | 
|  | 807 | return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter", | 
|  | 808 | OuterLoop->getStartLoc(), | 
|  | 809 | OuterLoop->getHeader()) | 
|  | 810 | << "Only outer loops with 1 induction variable can be " | 
|  | 811 | "interchanged currently."; | 
|  | 812 | }); | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 813 | return true; | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 814 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 815 |  | 
|  | 816 | // TODO: Triangular loops are not handled for now. | 
|  | 817 | if (!isLoopStructureUnderstood(InnerInductionVar)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 818 | LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 819 | ORE->emit([&]() { | 
|  | 820 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner", | 
|  | 821 | InnerLoop->getStartLoc(), | 
|  | 822 | InnerLoop->getHeader()) | 
|  | 823 | << "Inner loop structure not understood currently."; | 
|  | 824 | }); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 825 | return true; | 
|  | 826 | } | 
|  | 827 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 828 | // TODO: We only handle LCSSA PHI's corresponding to reduction for now. | 
| Florian Hahn | 1da30c6 | 2018-04-25 09:35:54 +0000 | [diff] [blame] | 829 | BasicBlock *InnerExit = InnerLoop->getExitBlock(); | 
|  | 830 | if (!containsSafePHI(InnerExit, false)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 831 | LLVM_DEBUG( | 
|  | 832 | dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 833 | ORE->emit([&]() { | 
|  | 834 | return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner", | 
|  | 835 | InnerLoop->getStartLoc(), | 
|  | 836 | InnerLoop->getHeader()) | 
|  | 837 | << "Only inner loops with LCSSA PHIs can be interchange " | 
|  | 838 | "currently."; | 
|  | 839 | }); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 840 | return true; | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 841 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 842 |  | 
|  | 843 | // TODO: Current limitation: Since we split the inner loop latch at the point | 
|  | 844 | // were induction variable is incremented (induction.next); We cannot have | 
|  | 845 | // more than 1 user of induction.next since it would result in broken code | 
|  | 846 | // after split. | 
|  | 847 | // e.g. | 
|  | 848 | // for(i=0;i<N;i++) { | 
|  | 849 | //    for(j = 0;j<M;j++) { | 
|  | 850 | //      A[j+1][i+2] = A[j][i]+k; | 
|  | 851 | //  } | 
|  | 852 | // } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 853 | Instruction *InnerIndexVarInc = nullptr; | 
|  | 854 | if (InnerInductionVar->getIncomingBlock(0) == InnerLoopPreHeader) | 
|  | 855 | InnerIndexVarInc = | 
|  | 856 | dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(1)); | 
|  | 857 | else | 
|  | 858 | InnerIndexVarInc = | 
|  | 859 | dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); | 
|  | 860 |  | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 861 | if (!InnerIndexVarInc) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 862 | LLVM_DEBUG( | 
|  | 863 | dbgs() << "Did not find an instruction to increment the induction " | 
|  | 864 | << "variable.\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 865 | ORE->emit([&]() { | 
|  | 866 | return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner", | 
|  | 867 | InnerLoop->getStartLoc(), | 
|  | 868 | InnerLoop->getHeader()) | 
|  | 869 | << "The inner loop does not increment the induction variable."; | 
|  | 870 | }); | 
| Pete Cooper | 11bd958 | 2015-07-27 18:37:58 +0000 | [diff] [blame] | 871 | return true; | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 872 | } | 
| Pete Cooper | 11bd958 | 2015-07-27 18:37:58 +0000 | [diff] [blame] | 873 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 874 | // Since we split the inner loop latch on this induction variable. Make sure | 
|  | 875 | // we do not have any instruction between the induction variable and branch | 
|  | 876 | // instruction. | 
|  | 877 |  | 
| David Majnemer | d770877 | 2016-06-24 04:05:21 +0000 | [diff] [blame] | 878 | bool FoundInduction = false; | 
| Florian Hahn | fd2bc11 | 2018-04-26 10:26:17 +0000 | [diff] [blame] | 879 | for (const Instruction &I : | 
|  | 880 | llvm::reverse(InnerLoopLatch->instructionsWithoutDebug())) { | 
| Florian Hahn | cd78345 | 2017-08-25 16:52:29 +0000 | [diff] [blame] | 881 | if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I) || | 
|  | 882 | isa<ZExtInst>(I)) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 883 | continue; | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 884 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 885 | // We found an instruction. If this is not induction variable then it is not | 
|  | 886 | // safe to split this loop latch. | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 887 | if (!I.isIdenticalTo(InnerIndexVarInc)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 888 | LLVM_DEBUG(dbgs() << "Found unsupported instructions between induction " | 
|  | 889 | << "variable increment and branch.\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 890 | ORE->emit([&]() { | 
|  | 891 | return OptimizationRemarkMissed( | 
|  | 892 | DEBUG_TYPE, "UnsupportedInsBetweenInduction", | 
|  | 893 | InnerLoop->getStartLoc(), InnerLoop->getHeader()) | 
|  | 894 | << "Found unsupported instruction between induction variable " | 
|  | 895 | "increment and branch."; | 
|  | 896 | }); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 897 | return true; | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 898 | } | 
| David Majnemer | d770877 | 2016-06-24 04:05:21 +0000 | [diff] [blame] | 899 |  | 
|  | 900 | FoundInduction = true; | 
|  | 901 | break; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 902 | } | 
| Benjamin Kramer | df005cb | 2015-08-08 18:27:36 +0000 | [diff] [blame] | 903 | // The loop latch ended and we didn't find the induction variable return as | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 904 | // current limitation. | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 905 | if (!FoundInduction) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 906 | LLVM_DEBUG(dbgs() << "Did not find the induction variable.\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 907 | ORE->emit([&]() { | 
|  | 908 | return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable", | 
|  | 909 | InnerLoop->getStartLoc(), | 
|  | 910 | InnerLoop->getHeader()) | 
|  | 911 | << "Did not find the induction variable."; | 
|  | 912 | }); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 913 | return true; | 
| Florian Hahn | 4eeff39 | 2017-07-03 15:32:00 +0000 | [diff] [blame] | 914 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 915 | return false; | 
|  | 916 | } | 
|  | 917 |  | 
| Florian Hahn | f3fea0f | 2018-04-27 13:52:51 +0000 | [diff] [blame] | 918 | // We currently support LCSSA PHI nodes in the outer loop exit, if their | 
|  | 919 | // incoming values do not come from the outer loop latch or if the | 
|  | 920 | // outer loop latch has a single predecessor. In that case, the value will | 
|  | 921 | // be available if both the inner and outer loop conditions are true, which | 
|  | 922 | // will still be true after interchanging. If we have multiple predecessor, | 
|  | 923 | // that may not be the case, e.g. because the outer loop latch may be executed | 
|  | 924 | // if the inner loop is not executed. | 
|  | 925 | static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) { | 
|  | 926 | BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock(); | 
|  | 927 | for (PHINode &PHI : LoopNestExit->phis()) { | 
|  | 928 | //  FIXME: We currently are not able to detect floating point reductions | 
|  | 929 | //         and have to use floating point PHIs as a proxy to prevent | 
|  | 930 | //         interchanging in the presence of floating point reductions. | 
|  | 931 | if (PHI.getType()->isFloatingPointTy()) | 
|  | 932 | return false; | 
|  | 933 | for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) { | 
|  | 934 | Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i)); | 
|  | 935 | if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch()) | 
|  | 936 | continue; | 
|  | 937 |  | 
|  | 938 | // The incoming value is defined in the outer loop latch. Currently we | 
|  | 939 | // only support that in case the outer loop latch has a single predecessor. | 
|  | 940 | // This guarantees that the outer loop latch is executed if and only if | 
|  | 941 | // the inner loop is executed (because tightlyNested() guarantees that the | 
|  | 942 | // outer loop header only branches to the inner loop or the outer loop | 
|  | 943 | // latch). | 
|  | 944 | // FIXME: We could weaken this logic and allow multiple predecessors, | 
|  | 945 | //        if the values are produced outside the loop latch. We would need | 
|  | 946 | //        additional logic to update the PHI nodes in the exit block as | 
|  | 947 | //        well. | 
|  | 948 | if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr) | 
|  | 949 | return false; | 
|  | 950 | } | 
|  | 951 | } | 
|  | 952 | return true; | 
|  | 953 | } | 
|  | 954 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 955 | bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, | 
|  | 956 | unsigned OuterLoopId, | 
|  | 957 | CharMatrix &DepMatrix) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 958 | if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 959 | LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId | 
|  | 960 | << " and OuterLoopId = " << OuterLoopId | 
|  | 961 | << " due to dependence\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 962 | ORE->emit([&]() { | 
|  | 963 | return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence", | 
|  | 964 | InnerLoop->getStartLoc(), | 
|  | 965 | InnerLoop->getHeader()) | 
|  | 966 | << "Cannot interchange loops due to dependences."; | 
|  | 967 | }); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 968 | return false; | 
|  | 969 | } | 
| Florian Hahn | 4284049 | 2017-07-31 09:00:52 +0000 | [diff] [blame] | 970 | // Check if outer and inner loop contain legal instructions only. | 
|  | 971 | for (auto *BB : OuterLoop->blocks()) | 
| Florian Hahn | fd2bc11 | 2018-04-26 10:26:17 +0000 | [diff] [blame] | 972 | for (Instruction &I : BB->instructionsWithoutDebug()) | 
| Florian Hahn | 4284049 | 2017-07-31 09:00:52 +0000 | [diff] [blame] | 973 | if (CallInst *CI = dyn_cast<CallInst>(&I)) { | 
|  | 974 | // readnone functions do not prevent interchanging. | 
|  | 975 | if (CI->doesNotReadMemory()) | 
|  | 976 | continue; | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 977 | LLVM_DEBUG( | 
|  | 978 | dbgs() << "Loops with call instructions cannot be interchanged " | 
|  | 979 | << "safely."); | 
| Florian Hahn | 9467ccf | 2018-04-03 20:54:04 +0000 | [diff] [blame] | 980 | ORE->emit([&]() { | 
|  | 981 | return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst", | 
|  | 982 | CI->getDebugLoc(), | 
|  | 983 | CI->getParent()) | 
|  | 984 | << "Cannot interchange loops due to call instruction."; | 
|  | 985 | }); | 
|  | 986 |  | 
| Florian Hahn | 4284049 | 2017-07-31 09:00:52 +0000 | [diff] [blame] | 987 | return false; | 
|  | 988 | } | 
|  | 989 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 990 | // TODO: The loops could not be interchanged due to current limitations in the | 
|  | 991 | // transform module. | 
|  | 992 | if (currentLimitations()) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 993 | LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 994 | return false; | 
|  | 995 | } | 
|  | 996 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 997 | // Check if the loops are tightly nested. | 
|  | 998 | if (!tightlyNested(OuterLoop, InnerLoop)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 999 | LLVM_DEBUG(dbgs() << "Loops not tightly nested\n"); | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 1000 | ORE->emit([&]() { | 
|  | 1001 | return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested", | 
|  | 1002 | InnerLoop->getStartLoc(), | 
|  | 1003 | InnerLoop->getHeader()) | 
|  | 1004 | << "Cannot interchange loops because they are not tightly " | 
|  | 1005 | "nested."; | 
|  | 1006 | }); | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 1007 | return false; | 
|  | 1008 | } | 
|  | 1009 |  | 
| Florian Hahn | f3fea0f | 2018-04-27 13:52:51 +0000 | [diff] [blame] | 1010 | if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 1011 | LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n"); | 
| Florian Hahn | f3fea0f | 2018-04-27 13:52:51 +0000 | [diff] [blame] | 1012 | ORE->emit([&]() { | 
|  | 1013 | return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI", | 
|  | 1014 | OuterLoop->getStartLoc(), | 
|  | 1015 | OuterLoop->getHeader()) | 
|  | 1016 | << "Found unsupported PHI node in loop exit."; | 
|  | 1017 | }); | 
|  | 1018 | return false; | 
|  | 1019 | } | 
|  | 1020 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1021 | return true; | 
|  | 1022 | } | 
|  | 1023 |  | 
|  | 1024 | int LoopInterchangeProfitability::getInstrOrderCost() { | 
|  | 1025 | unsigned GoodOrder, BadOrder; | 
|  | 1026 | BadOrder = GoodOrder = 0; | 
| Florian Hahn | f66efd6 | 2017-07-24 11:41:30 +0000 | [diff] [blame] | 1027 | for (BasicBlock *BB : InnerLoop->blocks()) { | 
|  | 1028 | for (Instruction &Ins : *BB) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1029 | if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) { | 
|  | 1030 | unsigned NumOp = GEP->getNumOperands(); | 
|  | 1031 | bool FoundInnerInduction = false; | 
|  | 1032 | bool FoundOuterInduction = false; | 
|  | 1033 | for (unsigned i = 0; i < NumOp; ++i) { | 
|  | 1034 | const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i)); | 
|  | 1035 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal); | 
|  | 1036 | if (!AR) | 
|  | 1037 | continue; | 
|  | 1038 |  | 
|  | 1039 | // If we find the inner induction after an outer induction e.g. | 
|  | 1040 | // for(int i=0;i<N;i++) | 
|  | 1041 | //   for(int j=0;j<N;j++) | 
|  | 1042 | //     A[i][j] = A[i-1][j-1]+k; | 
|  | 1043 | // then it is a good order. | 
|  | 1044 | if (AR->getLoop() == InnerLoop) { | 
|  | 1045 | // We found an InnerLoop induction after OuterLoop induction. It is | 
|  | 1046 | // a good order. | 
|  | 1047 | FoundInnerInduction = true; | 
|  | 1048 | if (FoundOuterInduction) { | 
|  | 1049 | GoodOrder++; | 
|  | 1050 | break; | 
|  | 1051 | } | 
|  | 1052 | } | 
|  | 1053 | // If we find the outer induction after an inner induction e.g. | 
|  | 1054 | // for(int i=0;i<N;i++) | 
|  | 1055 | //   for(int j=0;j<N;j++) | 
|  | 1056 | //     A[j][i] = A[j-1][i-1]+k; | 
|  | 1057 | // then it is a bad order. | 
|  | 1058 | if (AR->getLoop() == OuterLoop) { | 
|  | 1059 | // We found an OuterLoop induction after InnerLoop induction. It is | 
|  | 1060 | // a bad order. | 
|  | 1061 | FoundOuterInduction = true; | 
|  | 1062 | if (FoundInnerInduction) { | 
|  | 1063 | BadOrder++; | 
|  | 1064 | break; | 
|  | 1065 | } | 
|  | 1066 | } | 
|  | 1067 | } | 
|  | 1068 | } | 
|  | 1069 | } | 
|  | 1070 | } | 
|  | 1071 | return GoodOrder - BadOrder; | 
|  | 1072 | } | 
|  | 1073 |  | 
| Chad Rosier | e6b3a63 | 2016-09-14 17:12:30 +0000 | [diff] [blame] | 1074 | static bool isProfitableForVectorization(unsigned InnerLoopId, | 
|  | 1075 | unsigned OuterLoopId, | 
|  | 1076 | CharMatrix &DepMatrix) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1077 | // TODO: Improve this heuristic to catch more cases. | 
|  | 1078 | // If the inner loop is loop independent or doesn't carry any dependency it is | 
|  | 1079 | // profitable to move this to outer position. | 
| Florian Hahn | f66efd6 | 2017-07-24 11:41:30 +0000 | [diff] [blame] | 1080 | for (auto &Row : DepMatrix) { | 
|  | 1081 | if (Row[InnerLoopId] != 'S' && Row[InnerLoopId] != 'I') | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1082 | return false; | 
|  | 1083 | // TODO: We need to improve this heuristic. | 
| Florian Hahn | f66efd6 | 2017-07-24 11:41:30 +0000 | [diff] [blame] | 1084 | if (Row[OuterLoopId] != '=') | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1085 | return false; | 
|  | 1086 | } | 
|  | 1087 | // If outer loop has dependence and inner loop is loop independent then it is | 
|  | 1088 | // profitable to interchange to enable parallelism. | 
| Florian Hahn | ceee788 | 2018-04-24 16:55:32 +0000 | [diff] [blame] | 1089 | // If there are no dependences, interchanging will not improve anything. | 
|  | 1090 | return !DepMatrix.empty(); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1091 | } | 
|  | 1092 |  | 
|  | 1093 | bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, | 
|  | 1094 | unsigned OuterLoopId, | 
|  | 1095 | CharMatrix &DepMatrix) { | 
| Benjamin Kramer | df005cb | 2015-08-08 18:27:36 +0000 | [diff] [blame] | 1096 | // TODO: Add better profitability checks. | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1097 | // e.g | 
|  | 1098 | // 1) Construct dependency matrix and move the one with no loop carried dep | 
|  | 1099 | //    inside to enable vectorization. | 
|  | 1100 |  | 
|  | 1101 | // This is rough cost estimation algorithm. It counts the good and bad order | 
|  | 1102 | // of induction variables in the instruction and allows reordering if number | 
|  | 1103 | // of bad orders is more than good. | 
| Chad Rosier | 7243189 | 2016-09-14 17:07:13 +0000 | [diff] [blame] | 1104 | int Cost = getInstrOrderCost(); | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 1105 | LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n"); | 
| Chad Rosier | 7243189 | 2016-09-14 17:07:13 +0000 | [diff] [blame] | 1106 | if (Cost < -LoopInterchangeCostThreshold) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1107 | return true; | 
|  | 1108 |  | 
| Benjamin Kramer | df005cb | 2015-08-08 18:27:36 +0000 | [diff] [blame] | 1109 | // It is not profitable as per current cache profitability model. But check if | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1110 | // we can move this loop outside to improve parallelism. | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 1111 | if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) | 
|  | 1112 | return true; | 
|  | 1113 |  | 
| Vivek Pandya | 9590658 | 2017-10-11 17:12:59 +0000 | [diff] [blame] | 1114 | ORE->emit([&]() { | 
|  | 1115 | return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable", | 
|  | 1116 | InnerLoop->getStartLoc(), | 
|  | 1117 | InnerLoop->getHeader()) | 
|  | 1118 | << "Interchanging loops is too costly (cost=" | 
|  | 1119 | << ore::NV("Cost", Cost) << ", threshold=" | 
|  | 1120 | << ore::NV("Threshold", LoopInterchangeCostThreshold) | 
|  | 1121 | << ") and it does not improve parallelism."; | 
|  | 1122 | }); | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 1123 | return false; | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1124 | } | 
|  | 1125 |  | 
|  | 1126 | void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop, | 
|  | 1127 | Loop *InnerLoop) { | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 1128 | for (Loop *L : *OuterLoop) | 
|  | 1129 | if (L == InnerLoop) { | 
|  | 1130 | OuterLoop->removeChildLoop(L); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1131 | return; | 
|  | 1132 | } | 
| Benjamin Kramer | 8ceb323 | 2015-10-25 22:28:27 +0000 | [diff] [blame] | 1133 | llvm_unreachable("Couldn't find loop"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1134 | } | 
| Daniel Jasper | 6adbd7a | 2015-03-06 10:39:14 +0000 | [diff] [blame] | 1135 |  | 
| Florian Hahn | 831a757 | 2018-04-05 09:48:45 +0000 | [diff] [blame] | 1136 | /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the | 
|  | 1137 | /// new inner and outer loop after interchanging: NewInner is the original | 
|  | 1138 | /// outer loop and NewOuter is the original inner loop. | 
|  | 1139 | /// | 
|  | 1140 | /// Before interchanging, we have the following structure | 
|  | 1141 | /// Outer preheader | 
|  | 1142 | //  Outer header | 
|  | 1143 | //    Inner preheader | 
|  | 1144 | //    Inner header | 
|  | 1145 | //      Inner body | 
|  | 1146 | //      Inner latch | 
|  | 1147 | //   outer bbs | 
|  | 1148 | //   Outer latch | 
|  | 1149 | // | 
|  | 1150 | // After interchanging: | 
|  | 1151 | // Inner preheader | 
|  | 1152 | // Inner header | 
|  | 1153 | //   Outer preheader | 
|  | 1154 | //   Outer header | 
|  | 1155 | //     Inner body | 
|  | 1156 | //     outer bbs | 
|  | 1157 | //     Outer latch | 
|  | 1158 | //   Inner latch | 
|  | 1159 | void LoopInterchangeTransform::restructureLoops( | 
|  | 1160 | Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader, | 
|  | 1161 | BasicBlock *OrigOuterPreHeader) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1162 | Loop *OuterLoopParent = OuterLoop->getParentLoop(); | 
| Florian Hahn | 831a757 | 2018-04-05 09:48:45 +0000 | [diff] [blame] | 1163 | // The original inner loop preheader moves from the new inner loop to | 
|  | 1164 | // the parent loop, if there is one. | 
|  | 1165 | NewInner->removeBlockFromLoop(OrigInnerPreHeader); | 
|  | 1166 | LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent); | 
|  | 1167 |  | 
|  | 1168 | // Switch the loop levels. | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1169 | if (OuterLoopParent) { | 
|  | 1170 | // Remove the loop from its parent loop. | 
| Florian Hahn | 831a757 | 2018-04-05 09:48:45 +0000 | [diff] [blame] | 1171 | removeChildLoop(OuterLoopParent, NewInner); | 
|  | 1172 | removeChildLoop(NewInner, NewOuter); | 
|  | 1173 | OuterLoopParent->addChildLoop(NewOuter); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1174 | } else { | 
| Florian Hahn | 831a757 | 2018-04-05 09:48:45 +0000 | [diff] [blame] | 1175 | removeChildLoop(NewInner, NewOuter); | 
|  | 1176 | LI->changeTopLevelLoop(NewInner, NewOuter); | 
|  | 1177 | } | 
|  | 1178 | while (!NewOuter->empty()) | 
|  | 1179 | NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin())); | 
|  | 1180 | NewOuter->addChildLoop(NewInner); | 
|  | 1181 |  | 
|  | 1182 | // BBs from the original inner loop. | 
|  | 1183 | SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks()); | 
|  | 1184 |  | 
|  | 1185 | // Add BBs from the original outer loop to the original inner loop (excluding | 
|  | 1186 | // BBs already in inner loop) | 
|  | 1187 | for (BasicBlock *BB : NewInner->blocks()) | 
|  | 1188 | if (LI->getLoopFor(BB) == NewInner) | 
|  | 1189 | NewOuter->addBlockEntry(BB); | 
|  | 1190 |  | 
|  | 1191 | // Now remove inner loop header and latch from the new inner loop and move | 
|  | 1192 | // other BBs (the loop body) to the new inner loop. | 
|  | 1193 | BasicBlock *OuterHeader = NewOuter->getHeader(); | 
|  | 1194 | BasicBlock *OuterLatch = NewOuter->getLoopLatch(); | 
|  | 1195 | for (BasicBlock *BB : OrigInnerBBs) { | 
| Florian Hahn | 74418185 | 2018-04-23 21:38:19 +0000 | [diff] [blame] | 1196 | // Nothing will change for BBs in child loops. | 
|  | 1197 | if (LI->getLoopFor(BB) != NewOuter) | 
|  | 1198 | continue; | 
| Florian Hahn | 831a757 | 2018-04-05 09:48:45 +0000 | [diff] [blame] | 1199 | // Remove the new outer loop header and latch from the new inner loop. | 
|  | 1200 | if (BB == OuterHeader || BB == OuterLatch) | 
|  | 1201 | NewInner->removeBlockFromLoop(BB); | 
|  | 1202 | else | 
|  | 1203 | LI->changeLoopFor(BB, NewInner); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1204 | } | 
|  | 1205 |  | 
| Florian Hahn | 831a757 | 2018-04-05 09:48:45 +0000 | [diff] [blame] | 1206 | // The preheader of the original outer loop becomes part of the new | 
|  | 1207 | // outer loop. | 
|  | 1208 | NewOuter->addBlockEntry(OrigOuterPreHeader); | 
|  | 1209 | LI->changeLoopFor(OrigOuterPreHeader, NewOuter); | 
| Florian Hahn | 3afb974 | 2018-09-14 07:50:20 +0000 | [diff] [blame] | 1210 |  | 
|  | 1211 | // Tell SE that we move the loops around. | 
|  | 1212 | SE->forgetLoop(NewOuter); | 
|  | 1213 | SE->forgetLoop(NewInner); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1214 | } | 
|  | 1215 |  | 
|  | 1216 | bool LoopInterchangeTransform::transform() { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1217 | bool Transformed = false; | 
|  | 1218 | Instruction *InnerIndexVar; | 
|  | 1219 |  | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 1220 | if (InnerLoop->getSubLoops().empty()) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1221 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 1222 | LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1223 | PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); | 
|  | 1224 | if (!InductionPHI) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 1225 | LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1226 | return false; | 
|  | 1227 | } | 
|  | 1228 |  | 
|  | 1229 | if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) | 
|  | 1230 | InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(1)); | 
|  | 1231 | else | 
|  | 1232 | InnerIndexVar = dyn_cast<Instruction>(InductionPHI->getIncomingValue(0)); | 
|  | 1233 |  | 
| Florian Hahn | d8fcf0d | 2018-06-19 08:03:24 +0000 | [diff] [blame] | 1234 | // Ensure that InductionPHI is the first Phi node. | 
| David Green | 907b60f | 2017-10-21 13:58:37 +0000 | [diff] [blame] | 1235 | if (&InductionPHI->getParent()->front() != InductionPHI) | 
|  | 1236 | InductionPHI->moveBefore(&InductionPHI->getParent()->front()); | 
|  | 1237 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1238 | // Split at the place were the induction variable is | 
|  | 1239 | // incremented/decremented. | 
|  | 1240 | // TODO: This splitting logic may not work always. Fix this. | 
|  | 1241 | splitInnerLoopLatch(InnerIndexVar); | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 1242 | LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1243 |  | 
| Benjamin Kramer | df005cb | 2015-08-08 18:27:36 +0000 | [diff] [blame] | 1244 | // Splits the inner loops phi nodes out into a separate basic block. | 
| Florian Hahn | d8fcf0d | 2018-06-19 08:03:24 +0000 | [diff] [blame] | 1245 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); | 
|  | 1246 | SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); | 
|  | 1247 | LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1248 | } | 
|  | 1249 |  | 
|  | 1250 | Transformed |= adjustLoopLinks(); | 
|  | 1251 | if (!Transformed) { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 1252 | LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n"); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1253 | return false; | 
|  | 1254 | } | 
|  | 1255 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1256 | return true; | 
|  | 1257 | } | 
|  | 1258 |  | 
| Benjamin Kramer | 7944292 | 2015-03-06 18:59:14 +0000 | [diff] [blame] | 1259 | void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1260 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1261 | BasicBlock *InnerLoopLatchPred = InnerLoopLatch; | 
| Benjamin Kramer | 7944292 | 2015-03-06 18:59:14 +0000 | [diff] [blame] | 1262 | InnerLoopLatch = SplitBlock(InnerLoopLatchPred, Inc, DT, LI); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1263 | } | 
|  | 1264 |  | 
| Florian Hahn | d8fcf0d | 2018-06-19 08:03:24 +0000 | [diff] [blame] | 1265 | /// \brief Move all instructions except the terminator from FromBB right before | 
| Benjamin Kramer | 7944292 | 2015-03-06 18:59:14 +0000 | [diff] [blame] | 1266 | /// InsertBefore | 
|  | 1267 | static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { | 
|  | 1268 | auto &ToList = InsertBefore->getParent()->getInstList(); | 
|  | 1269 | auto &FromList = FromBB->getInstList(); | 
|  | 1270 |  | 
| Duncan P. N. Exon Smith | be4d8cb | 2015-10-13 19:26:58 +0000 | [diff] [blame] | 1271 | ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(), | 
|  | 1272 | FromBB->getTerminator()->getIterator()); | 
| Benjamin Kramer | 7944292 | 2015-03-06 18:59:14 +0000 | [diff] [blame] | 1273 | } | 
|  | 1274 |  | 
| Florian Hahn | 6feb637 | 2018-09-26 19:34:25 +0000 | [diff] [blame] | 1275 | static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, | 
|  | 1276 | BasicBlock *NewPred) { | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 1277 | for (PHINode &PHI : CurrBlock->phis()) { | 
|  | 1278 | unsigned Num = PHI.getNumIncomingValues(); | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 1279 | for (unsigned i = 0; i < Num; ++i) { | 
| Florian Hahn | 5912c66 | 2018-05-02 10:53:04 +0000 | [diff] [blame] | 1280 | if (PHI.getIncomingBlock(i) == OldPred) | 
|  | 1281 | PHI.setIncomingBlock(i, NewPred); | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 1282 | } | 
|  | 1283 | } | 
|  | 1284 | } | 
|  | 1285 |  | 
| Adrian Prantl | 5f8f34e4 | 2018-05-01 15:54:18 +0000 | [diff] [blame] | 1286 | /// Update BI to jump to NewBB instead of OldBB. Records updates to | 
| Florian Hahn | c6296fe | 2018-02-14 13:13:15 +0000 | [diff] [blame] | 1287 | /// the dominator tree in DTUpdates, if DT should be preserved. | 
|  | 1288 | static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB, | 
|  | 1289 | BasicBlock *NewBB, | 
|  | 1290 | std::vector<DominatorTree::UpdateType> &DTUpdates) { | 
| Chandler Carruth | 96fc1de | 2018-08-26 08:41:15 +0000 | [diff] [blame] | 1291 | assert(llvm::count_if(successors(BI), | 
| Florian Hahn | c6296fe | 2018-02-14 13:13:15 +0000 | [diff] [blame] | 1292 | [OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 && | 
|  | 1293 | "BI must jump to OldBB at most once."); | 
|  | 1294 | for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) { | 
|  | 1295 | if (BI->getSuccessor(i) == OldBB) { | 
|  | 1296 | BI->setSuccessor(i, NewBB); | 
|  | 1297 |  | 
|  | 1298 | DTUpdates.push_back( | 
|  | 1299 | {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB}); | 
|  | 1300 | DTUpdates.push_back( | 
|  | 1301 | {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB}); | 
|  | 1302 | break; | 
|  | 1303 | } | 
|  | 1304 | } | 
|  | 1305 | } | 
|  | 1306 |  | 
| Florian Hahn | 6feb637 | 2018-09-26 19:34:25 +0000 | [diff] [blame] | 1307 | // Move Lcssa PHIs to the right place. | 
|  | 1308 | static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch, | 
|  | 1309 | BasicBlock *OuterLatch) { | 
|  | 1310 | SmallVector<PHINode *, 8> LcssaInnerExit; | 
|  | 1311 | for (PHINode &P : InnerExit->phis()) | 
|  | 1312 | LcssaInnerExit.push_back(&P); | 
|  | 1313 |  | 
|  | 1314 | SmallVector<PHINode *, 8> LcssaInnerLatch; | 
|  | 1315 | for (PHINode &P : InnerLatch->phis()) | 
|  | 1316 | LcssaInnerLatch.push_back(&P); | 
|  | 1317 |  | 
|  | 1318 | // Lcssa PHIs for values used outside the inner loop are in InnerExit. | 
|  | 1319 | // If a PHI node has users outside of InnerExit, it has a use outside the | 
|  | 1320 | // interchanged loop and we have to preserve it. We move these to | 
|  | 1321 | // InnerLatch, which will become the new exit block for the innermost | 
|  | 1322 | // loop after interchanging. For PHIs only used in InnerExit, we can just | 
|  | 1323 | // replace them with the incoming value. | 
|  | 1324 | for (PHINode *P : LcssaInnerExit) { | 
|  | 1325 | bool hasUsersOutside = false; | 
|  | 1326 | for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) { | 
|  | 1327 | Use &U = *UI; | 
|  | 1328 | ++UI; | 
|  | 1329 | auto *Usr = cast<Instruction>(U.getUser()); | 
|  | 1330 | if (Usr->getParent() != InnerExit) { | 
|  | 1331 | hasUsersOutside = true; | 
|  | 1332 | continue; | 
|  | 1333 | } | 
|  | 1334 | U.set(P->getIncomingValueForBlock(InnerLatch)); | 
|  | 1335 | } | 
|  | 1336 | if (hasUsersOutside) | 
|  | 1337 | P->moveBefore(InnerLatch->getFirstNonPHI()); | 
|  | 1338 | else | 
|  | 1339 | P->eraseFromParent(); | 
|  | 1340 | } | 
|  | 1341 |  | 
|  | 1342 | // If the inner loop latch contains LCSSA PHIs, those come from a child loop | 
|  | 1343 | // and we have to move them to the new inner latch. | 
|  | 1344 | for (PHINode *P : LcssaInnerLatch) | 
|  | 1345 | P->moveBefore(InnerExit->getFirstNonPHI()); | 
|  | 1346 |  | 
|  | 1347 | // Now adjust the incoming blocks for the LCSSA PHIs. | 
|  | 1348 | // For PHIs moved from Inner's exit block, we need to replace Inner's latch | 
|  | 1349 | // with the new latch. | 
|  | 1350 | updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch); | 
|  | 1351 | } | 
|  | 1352 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1353 | bool LoopInterchangeTransform::adjustLoopBranches() { | 
| Nicola Zaghen | d34e60c | 2018-05-14 12:53:11 +0000 | [diff] [blame] | 1354 | LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n"); | 
| Florian Hahn | c6296fe | 2018-02-14 13:13:15 +0000 | [diff] [blame] | 1355 | std::vector<DominatorTree::UpdateType> DTUpdates; | 
|  | 1356 |  | 
| Florian Hahn | 236f6fe | 2018-09-06 09:57:27 +0000 | [diff] [blame] | 1357 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); | 
|  | 1358 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | 
|  | 1359 |  | 
|  | 1360 | assert(OuterLoopPreHeader != OuterLoop->getHeader() && | 
|  | 1361 | InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && | 
|  | 1362 | InnerLoopPreHeader && "Guaranteed by loop-simplify form"); | 
|  | 1363 | // Ensure that both preheaders do not contain PHI nodes and have single | 
|  | 1364 | // predecessors. This allows us to move them easily. We use | 
|  | 1365 | // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing | 
|  | 1366 | // preheaders do not satisfy those conditions. | 
|  | 1367 | if (isa<PHINode>(OuterLoopPreHeader->begin()) || | 
|  | 1368 | !OuterLoopPreHeader->getUniquePredecessor()) | 
|  | 1369 | OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, DT, LI, true); | 
|  | 1370 | if (InnerLoopPreHeader == OuterLoop->getHeader()) | 
|  | 1371 | InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, DT, LI, true); | 
|  | 1372 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1373 | // Adjust the loop preheader | 
|  | 1374 | BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); | 
|  | 1375 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); | 
|  | 1376 | BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); | 
|  | 1377 | BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1378 | BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor(); | 
|  | 1379 | BasicBlock *InnerLoopLatchPredecessor = | 
|  | 1380 | InnerLoopLatch->getUniquePredecessor(); | 
|  | 1381 | BasicBlock *InnerLoopLatchSuccessor; | 
|  | 1382 | BasicBlock *OuterLoopLatchSuccessor; | 
|  | 1383 |  | 
|  | 1384 | BranchInst *OuterLoopLatchBI = | 
|  | 1385 | dyn_cast<BranchInst>(OuterLoopLatch->getTerminator()); | 
|  | 1386 | BranchInst *InnerLoopLatchBI = | 
|  | 1387 | dyn_cast<BranchInst>(InnerLoopLatch->getTerminator()); | 
|  | 1388 | BranchInst *OuterLoopHeaderBI = | 
|  | 1389 | dyn_cast<BranchInst>(OuterLoopHeader->getTerminator()); | 
|  | 1390 | BranchInst *InnerLoopHeaderBI = | 
|  | 1391 | dyn_cast<BranchInst>(InnerLoopHeader->getTerminator()); | 
|  | 1392 |  | 
|  | 1393 | if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || | 
|  | 1394 | !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || | 
|  | 1395 | !InnerLoopHeaderBI) | 
|  | 1396 | return false; | 
|  | 1397 |  | 
|  | 1398 | BranchInst *InnerLoopLatchPredecessorBI = | 
|  | 1399 | dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator()); | 
|  | 1400 | BranchInst *OuterLoopPredecessorBI = | 
|  | 1401 | dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator()); | 
|  | 1402 |  | 
|  | 1403 | if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) | 
|  | 1404 | return false; | 
| Benjamin Kramer | df005cb | 2015-08-08 18:27:36 +0000 | [diff] [blame] | 1405 | BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); | 
|  | 1406 | if (!InnerLoopHeaderSuccessor) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1407 | return false; | 
|  | 1408 |  | 
|  | 1409 | // Adjust Loop Preheader and headers | 
| Florian Hahn | c6296fe | 2018-02-14 13:13:15 +0000 | [diff] [blame] | 1410 | updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader, | 
|  | 1411 | InnerLoopPreHeader, DTUpdates); | 
|  | 1412 | updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates); | 
|  | 1413 | updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, | 
|  | 1414 | InnerLoopHeaderSuccessor, DTUpdates); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1415 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 1416 | // Adjust reduction PHI's now that the incoming block has changed. | 
| Benjamin Kramer | df005cb | 2015-08-08 18:27:36 +0000 | [diff] [blame] | 1417 | updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader, | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 1418 | OuterLoopHeader); | 
|  | 1419 |  | 
| Florian Hahn | c6296fe | 2018-02-14 13:13:15 +0000 | [diff] [blame] | 1420 | updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor, | 
|  | 1421 | OuterLoopPreHeader, DTUpdates); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1422 |  | 
|  | 1423 | // -------------Adjust loop latches----------- | 
|  | 1424 | if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader) | 
|  | 1425 | InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1); | 
|  | 1426 | else | 
|  | 1427 | InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0); | 
|  | 1428 |  | 
| Florian Hahn | c6296fe | 2018-02-14 13:13:15 +0000 | [diff] [blame] | 1429 | updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch, | 
|  | 1430 | InnerLoopLatchSuccessor, DTUpdates); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1431 |  | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 1432 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1433 | if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) | 
|  | 1434 | OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); | 
|  | 1435 | else | 
|  | 1436 | OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0); | 
|  | 1437 |  | 
| Florian Hahn | c6296fe | 2018-02-14 13:13:15 +0000 | [diff] [blame] | 1438 | updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor, | 
|  | 1439 | OuterLoopLatchSuccessor, DTUpdates); | 
|  | 1440 | updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch, | 
|  | 1441 | DTUpdates); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1442 |  | 
| Florian Hahn | c6296fe | 2018-02-14 13:13:15 +0000 | [diff] [blame] | 1443 | DT->applyUpdates(DTUpdates); | 
| Florian Hahn | 831a757 | 2018-04-05 09:48:45 +0000 | [diff] [blame] | 1444 | restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader, | 
|  | 1445 | OuterLoopPreHeader); | 
|  | 1446 |  | 
| Florian Hahn | 6feb637 | 2018-09-26 19:34:25 +0000 | [diff] [blame] | 1447 | moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch); | 
|  | 1448 | // For PHIs in the exit block of the outer loop, outer's latch has been | 
|  | 1449 | // replaced by Inners'. | 
|  | 1450 | updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); | 
|  | 1451 |  | 
| Florian Hahn | d8fcf0d | 2018-06-19 08:03:24 +0000 | [diff] [blame] | 1452 | // Now update the reduction PHIs in the inner and outer loop headers. | 
|  | 1453 | SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs; | 
|  | 1454 | for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1)) | 
|  | 1455 | InnerLoopPHIs.push_back(cast<PHINode>(&PHI)); | 
|  | 1456 | for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1)) | 
|  | 1457 | OuterLoopPHIs.push_back(cast<PHINode>(&PHI)); | 
|  | 1458 |  | 
|  | 1459 | for (PHINode *PHI : OuterLoopPHIs) | 
|  | 1460 | PHI->moveBefore(InnerLoopHeader->getFirstNonPHI()); | 
|  | 1461 |  | 
|  | 1462 | // Move the PHI nodes from the inner loop header to the outer loop header. | 
|  | 1463 | // We have to deal with one kind of PHI nodes: | 
|  | 1464 | //  1) PHI nodes that are part of inner loop-only reductions. | 
|  | 1465 | // We only have to move the PHI node and update the incoming blocks. | 
|  | 1466 | for (PHINode *PHI : InnerLoopPHIs) { | 
|  | 1467 | PHI->moveBefore(OuterLoopHeader->getFirstNonPHI()); | 
|  | 1468 | for (BasicBlock *InBB : PHI->blocks()) { | 
|  | 1469 | if (InnerLoop->contains(InBB)) | 
|  | 1470 | continue; | 
|  | 1471 |  | 
|  | 1472 | assert(!isa<PHINode>(PHI->getIncomingValueForBlock(InBB)) && | 
|  | 1473 | "Unexpected incoming PHI node, reductions in outer loop are not " | 
|  | 1474 | "supported yet"); | 
|  | 1475 | PHI->replaceAllUsesWith(PHI->getIncomingValueForBlock(InBB)); | 
|  | 1476 | PHI->eraseFromParent(); | 
|  | 1477 | break; | 
|  | 1478 | } | 
|  | 1479 | } | 
|  | 1480 |  | 
|  | 1481 | // Update the incoming blocks for moved PHI nodes. | 
|  | 1482 | updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader); | 
|  | 1483 | updateIncomingBlock(OuterLoopHeader, InnerLoopLatch, OuterLoopLatch); | 
|  | 1484 | updateIncomingBlock(InnerLoopHeader, OuterLoopPreHeader, InnerLoopPreHeader); | 
|  | 1485 | updateIncomingBlock(InnerLoopHeader, OuterLoopLatch, InnerLoopLatch); | 
|  | 1486 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1487 | return true; | 
|  | 1488 | } | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1489 |  | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 1490 | void LoopInterchangeTransform::adjustLoopPreheaders() { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1491 | // We have interchanged the preheaders so we need to interchange the data in | 
|  | 1492 | // the preheader as well. | 
|  | 1493 | // This is because the content of inner preheader was previously executed | 
|  | 1494 | // inside the outer loop. | 
|  | 1495 | BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader(); | 
|  | 1496 | BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); | 
|  | 1497 | BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); | 
|  | 1498 | BranchInst *InnerTermBI = | 
|  | 1499 | cast<BranchInst>(InnerLoopPreHeader->getTerminator()); | 
|  | 1500 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1501 | // These instructions should now be executed inside the loop. | 
|  | 1502 | // Move instruction into a new block after outer header. | 
| Karthik Bhat | 8210fdf | 2015-04-23 04:51:44 +0000 | [diff] [blame] | 1503 | moveBBContents(InnerLoopPreHeader, OuterLoopHeader->getTerminator()); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1504 | // These instructions were not executed previously in the loop so move them to | 
|  | 1505 | // the older inner loop preheader. | 
| Benjamin Kramer | 7944292 | 2015-03-06 18:59:14 +0000 | [diff] [blame] | 1506 | moveBBContents(OuterLoopPreHeader, InnerTermBI); | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1507 | } | 
|  | 1508 |  | 
|  | 1509 | bool LoopInterchangeTransform::adjustLoopLinks() { | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1510 | // Adjust all branches in the inner and outer loop. | 
|  | 1511 | bool Changed = adjustLoopBranches(); | 
|  | 1512 | if (Changed) | 
|  | 1513 | adjustLoopPreheaders(); | 
|  | 1514 | return Changed; | 
|  | 1515 | } | 
|  | 1516 |  | 
|  | 1517 | char LoopInterchange::ID = 0; | 
| Eugene Zelenko | dd40f5e | 2017-10-16 21:34:24 +0000 | [diff] [blame] | 1518 |  | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1519 | INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", | 
|  | 1520 | "Interchanges loops for cache reuse", false, false) | 
| Florian Hahn | 8600fee | 2018-10-01 09:59:48 +0000 | [diff] [blame] | 1521 | INITIALIZE_PASS_DEPENDENCY(LoopPass) | 
| Chandler Carruth | 49c2219 | 2016-05-12 22:19:39 +0000 | [diff] [blame] | 1522 | INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass) | 
| Florian Hahn | ad99352 | 2017-07-15 13:13:19 +0000 | [diff] [blame] | 1523 | INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) | 
| Karthik Bhat | 88db86d | 2015-03-06 10:11:25 +0000 | [diff] [blame] | 1524 |  | 
|  | 1525 | INITIALIZE_PASS_END(LoopInterchange, "loop-interchange", | 
|  | 1526 | "Interchanges loops for cache reuse", false, false) | 
|  | 1527 |  | 
|  | 1528 | Pass *llvm::createLoopInterchangePass() { return new LoopInterchange(); } |