David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 1 | //===----------------- LoopRotationUtils.cpp -----------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file provides utilities to convert a loop into a loop with bottom test. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "llvm/Transforms/Utils/LoopRotationUtils.h" |
| 15 | #include "llvm/ADT/Statistic.h" |
| 16 | #include "llvm/Analysis/AliasAnalysis.h" |
| 17 | #include "llvm/Analysis/AssumptionCache.h" |
| 18 | #include "llvm/Analysis/BasicAliasAnalysis.h" |
| 19 | #include "llvm/Analysis/CodeMetrics.h" |
| 20 | #include "llvm/Analysis/GlobalsModRef.h" |
| 21 | #include "llvm/Analysis/InstructionSimplify.h" |
| 22 | #include "llvm/Analysis/LoopPass.h" |
| 23 | #include "llvm/Analysis/ScalarEvolution.h" |
| 24 | #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" |
| 25 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 26 | #include "llvm/Analysis/Utils/Local.h" |
| 27 | #include "llvm/Analysis/ValueTracking.h" |
| 28 | #include "llvm/IR/CFG.h" |
| 29 | #include "llvm/IR/DebugInfoMetadata.h" |
| 30 | #include "llvm/IR/Dominators.h" |
| 31 | #include "llvm/IR/Function.h" |
| 32 | #include "llvm/IR/IntrinsicInst.h" |
| 33 | #include "llvm/IR/Module.h" |
| 34 | #include "llvm/Support/CommandLine.h" |
| 35 | #include "llvm/Support/Debug.h" |
| 36 | #include "llvm/Support/raw_ostream.h" |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 37 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 38 | #include "llvm/Transforms/Utils/LoopUtils.h" |
| 39 | #include "llvm/Transforms/Utils/SSAUpdater.h" |
| 40 | #include "llvm/Transforms/Utils/ValueMapper.h" |
| 41 | using namespace llvm; |
| 42 | |
| 43 | #define DEBUG_TYPE "loop-rotate" |
| 44 | |
| 45 | STATISTIC(NumRotated, "Number of loops rotated"); |
| 46 | |
| 47 | namespace { |
| 48 | /// A simple loop rotation transformation. |
| 49 | class LoopRotate { |
| 50 | const unsigned MaxHeaderSize; |
| 51 | LoopInfo *LI; |
| 52 | const TargetTransformInfo *TTI; |
| 53 | AssumptionCache *AC; |
| 54 | DominatorTree *DT; |
| 55 | ScalarEvolution *SE; |
| 56 | const SimplifyQuery &SQ; |
Jin Lin | 585f269 | 2018-04-19 20:29:43 +0000 | [diff] [blame] | 57 | bool RotationOnly; |
| 58 | bool IsUtilMode; |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 59 | |
| 60 | public: |
| 61 | LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, |
| 62 | const TargetTransformInfo *TTI, AssumptionCache *AC, |
Jin Lin | 585f269 | 2018-04-19 20:29:43 +0000 | [diff] [blame] | 63 | DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ, |
| 64 | bool RotationOnly, bool IsUtilMode) |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 65 | : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), |
Jin Lin | 585f269 | 2018-04-19 20:29:43 +0000 | [diff] [blame] | 66 | SQ(SQ), RotationOnly(RotationOnly), IsUtilMode(IsUtilMode) {} |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 67 | bool processLoop(Loop *L); |
| 68 | |
| 69 | private: |
| 70 | bool rotateLoop(Loop *L, bool SimplifiedLatch); |
| 71 | bool simplifyLoopLatch(Loop *L); |
| 72 | }; |
| 73 | } // end anonymous namespace |
| 74 | |
| 75 | /// RewriteUsesOfClonedInstructions - We just cloned the instructions from the |
| 76 | /// old header into the preheader. If there were uses of the values produced by |
| 77 | /// these instruction that were outside of the loop, we have to insert PHI nodes |
| 78 | /// to merge the two values. Do this now. |
| 79 | static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, |
| 80 | BasicBlock *OrigPreheader, |
| 81 | ValueToValueMapTy &ValueMap, |
| 82 | SmallVectorImpl<PHINode*> *InsertedPHIs) { |
| 83 | // Remove PHI node entries that are no longer live. |
| 84 | BasicBlock::iterator I, E = OrigHeader->end(); |
| 85 | for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) |
| 86 | PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); |
| 87 | |
| 88 | // Now fix up users of the instructions in OrigHeader, inserting PHI nodes |
| 89 | // as necessary. |
| 90 | SSAUpdater SSA(InsertedPHIs); |
| 91 | for (I = OrigHeader->begin(); I != E; ++I) { |
| 92 | Value *OrigHeaderVal = &*I; |
| 93 | |
| 94 | // If there are no uses of the value (e.g. because it returns void), there |
| 95 | // is nothing to rewrite. |
| 96 | if (OrigHeaderVal->use_empty()) |
| 97 | continue; |
| 98 | |
| 99 | Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal); |
| 100 | |
| 101 | // The value now exits in two versions: the initial value in the preheader |
| 102 | // and the loop "next" value in the original header. |
| 103 | SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); |
| 104 | SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); |
| 105 | SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); |
| 106 | |
| 107 | // Visit each use of the OrigHeader instruction. |
| 108 | for (Value::use_iterator UI = OrigHeaderVal->use_begin(), |
| 109 | UE = OrigHeaderVal->use_end(); |
| 110 | UI != UE;) { |
| 111 | // Grab the use before incrementing the iterator. |
| 112 | Use &U = *UI; |
| 113 | |
| 114 | // Increment the iterator before removing the use from the list. |
| 115 | ++UI; |
| 116 | |
| 117 | // SSAUpdater can't handle a non-PHI use in the same block as an |
| 118 | // earlier def. We can easily handle those cases manually. |
| 119 | Instruction *UserInst = cast<Instruction>(U.getUser()); |
| 120 | if (!isa<PHINode>(UserInst)) { |
| 121 | BasicBlock *UserBB = UserInst->getParent(); |
| 122 | |
| 123 | // The original users in the OrigHeader are already using the |
| 124 | // original definitions. |
| 125 | if (UserBB == OrigHeader) |
| 126 | continue; |
| 127 | |
| 128 | // Users in the OrigPreHeader need to use the value to which the |
| 129 | // original definitions are mapped. |
| 130 | if (UserBB == OrigPreheader) { |
| 131 | U = OrigPreHeaderVal; |
| 132 | continue; |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | // Anything else can be handled by SSAUpdater. |
| 137 | SSA.RewriteUse(U); |
| 138 | } |
| 139 | |
| 140 | // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug |
| 141 | // intrinsics. |
| 142 | SmallVector<DbgValueInst *, 1> DbgValues; |
| 143 | llvm::findDbgValues(DbgValues, OrigHeaderVal); |
| 144 | for (auto &DbgValue : DbgValues) { |
| 145 | // The original users in the OrigHeader are already using the original |
| 146 | // definitions. |
| 147 | BasicBlock *UserBB = DbgValue->getParent(); |
| 148 | if (UserBB == OrigHeader) |
| 149 | continue; |
| 150 | |
| 151 | // Users in the OrigPreHeader need to use the value to which the |
| 152 | // original definitions are mapped and anything else can be handled by |
| 153 | // the SSAUpdater. To avoid adding PHINodes, check if the value is |
| 154 | // available in UserBB, if not substitute undef. |
| 155 | Value *NewVal; |
| 156 | if (UserBB == OrigPreheader) |
| 157 | NewVal = OrigPreHeaderVal; |
| 158 | else if (SSA.HasValueForBlock(UserBB)) |
| 159 | NewVal = SSA.GetValueInMiddleOfBlock(UserBB); |
| 160 | else |
| 161 | NewVal = UndefValue::get(OrigHeaderVal->getType()); |
| 162 | DbgValue->setOperand(0, |
| 163 | MetadataAsValue::get(OrigHeaderVal->getContext(), |
| 164 | ValueAsMetadata::get(NewVal))); |
| 165 | } |
| 166 | } |
| 167 | } |
| 168 | |
David Green | f80ebc8 | 2018-04-01 12:48:24 +0000 | [diff] [blame] | 169 | // Look for a phi which is only used outside the loop (via a LCSSA phi) |
| 170 | // in the exit from the header. This means that rotating the loop can |
| 171 | // remove the phi. |
| 172 | static bool shouldRotateLoopExitingLatch(Loop *L) { |
| 173 | BasicBlock *Header = L->getHeader(); |
| 174 | BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0); |
| 175 | if (L->contains(HeaderExit)) |
| 176 | HeaderExit = Header->getTerminator()->getSuccessor(1); |
| 177 | |
| 178 | for (auto &Phi : Header->phis()) { |
| 179 | // Look for uses of this phi in the loop/via exits other than the header. |
| 180 | if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) { |
| 181 | return cast<Instruction>(U)->getParent() != HeaderExit; |
| 182 | })) |
| 183 | continue; |
| 184 | return true; |
| 185 | } |
| 186 | |
| 187 | return false; |
| 188 | } |
| 189 | |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 190 | /// Rotate loop LP. Return true if the loop is rotated. |
| 191 | /// |
| 192 | /// \param SimplifiedLatch is true if the latch was just folded into the final |
| 193 | /// loop exit. In this case we may want to rotate even though the new latch is |
| 194 | /// now an exiting branch. This rotation would have happened had the latch not |
| 195 | /// been simplified. However, if SimplifiedLatch is false, then we avoid |
| 196 | /// rotating loops in which the latch exits to avoid excessive or endless |
| 197 | /// rotation. LoopRotate should be repeatable and converge to a canonical |
| 198 | /// form. This property is satisfied because simplifying the loop latch can only |
| 199 | /// happen once across multiple invocations of the LoopRotate pass. |
| 200 | bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { |
| 201 | // If the loop has only one block then there is not much to rotate. |
| 202 | if (L->getBlocks().size() == 1) |
| 203 | return false; |
| 204 | |
| 205 | BasicBlock *OrigHeader = L->getHeader(); |
| 206 | BasicBlock *OrigLatch = L->getLoopLatch(); |
| 207 | |
| 208 | BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); |
| 209 | if (!BI || BI->isUnconditional()) |
| 210 | return false; |
| 211 | |
| 212 | // If the loop header is not one of the loop exiting blocks then |
| 213 | // either this loop is already rotated or it is not |
| 214 | // suitable for loop rotation transformations. |
| 215 | if (!L->isLoopExiting(OrigHeader)) |
| 216 | return false; |
| 217 | |
| 218 | // If the loop latch already contains a branch that leaves the loop then the |
| 219 | // loop is already rotated. |
| 220 | if (!OrigLatch) |
| 221 | return false; |
| 222 | |
| 223 | // Rotate if either the loop latch does *not* exit the loop, or if the loop |
David Green | f80ebc8 | 2018-04-01 12:48:24 +0000 | [diff] [blame] | 224 | // latch was just simplified. Or if we think it will be profitable. |
Jin Lin | 585f269 | 2018-04-19 20:29:43 +0000 | [diff] [blame] | 225 | if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && |
David Green | f80ebc8 | 2018-04-01 12:48:24 +0000 | [diff] [blame] | 226 | !shouldRotateLoopExitingLatch(L)) |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 227 | return false; |
| 228 | |
| 229 | // Check size of original header and reject loop if it is very big or we can't |
| 230 | // duplicate blocks inside it. |
| 231 | { |
| 232 | SmallPtrSet<const Value *, 32> EphValues; |
| 233 | CodeMetrics::collectEphemeralValues(L, AC, EphValues); |
| 234 | |
| 235 | CodeMetrics Metrics; |
| 236 | Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); |
| 237 | if (Metrics.notDuplicatable) { |
| 238 | DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" |
| 239 | << " instructions: "; |
| 240 | L->dump()); |
| 241 | return false; |
| 242 | } |
| 243 | if (Metrics.convergent) { |
| 244 | DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " |
| 245 | "instructions: "; |
| 246 | L->dump()); |
| 247 | return false; |
| 248 | } |
| 249 | if (Metrics.NumInsts > MaxHeaderSize) |
| 250 | return false; |
| 251 | } |
| 252 | |
| 253 | // Now, this loop is suitable for rotation. |
| 254 | BasicBlock *OrigPreheader = L->getLoopPreheader(); |
| 255 | |
| 256 | // If the loop could not be converted to canonical form, it must have an |
| 257 | // indirectbr in it, just give up. |
| 258 | if (!OrigPreheader || !L->hasDedicatedExits()) |
| 259 | return false; |
| 260 | |
| 261 | // Anything ScalarEvolution may know about this loop or the PHI nodes |
Max Kazantsev | 5a0a40b | 2018-04-24 02:08:05 +0000 | [diff] [blame] | 262 | // in its header will soon be invalidated. We should also invalidate |
| 263 | // all outer loops because insertion and deletion of blocks that happens |
| 264 | // during the rotation may violate invariants related to backedge taken |
| 265 | // infos in them. |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 266 | if (SE) |
Max Kazantsev | 91f4816 | 2018-04-23 12:33:31 +0000 | [diff] [blame] | 267 | SE->forgetTopmostLoop(L); |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 268 | |
| 269 | DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); |
| 270 | |
| 271 | // Find new Loop header. NewHeader is a Header's one and only successor |
| 272 | // that is inside loop. Header's other successor is outside the |
| 273 | // loop. Otherwise loop is not suitable for rotation. |
| 274 | BasicBlock *Exit = BI->getSuccessor(0); |
| 275 | BasicBlock *NewHeader = BI->getSuccessor(1); |
| 276 | if (L->contains(Exit)) |
| 277 | std::swap(Exit, NewHeader); |
| 278 | assert(NewHeader && "Unable to determine new loop header"); |
| 279 | assert(L->contains(NewHeader) && !L->contains(Exit) && |
| 280 | "Unable to determine loop header and exit blocks"); |
| 281 | |
| 282 | // This code assumes that the new header has exactly one predecessor. |
| 283 | // Remove any single-entry PHI nodes in it. |
| 284 | assert(NewHeader->getSinglePredecessor() && |
| 285 | "New header doesn't have one pred!"); |
| 286 | FoldSingleEntryPHINodes(NewHeader); |
| 287 | |
| 288 | // Begin by walking OrigHeader and populating ValueMap with an entry for |
| 289 | // each Instruction. |
| 290 | BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); |
| 291 | ValueToValueMapTy ValueMap; |
| 292 | |
| 293 | // For PHI nodes, the value available in OldPreHeader is just the |
| 294 | // incoming value from OldPreHeader. |
| 295 | for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) |
| 296 | ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); |
| 297 | |
| 298 | // For the rest of the instructions, either hoist to the OrigPreheader if |
| 299 | // possible or create a clone in the OldPreHeader if not. |
| 300 | TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); |
| 301 | |
| 302 | // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication. |
| 303 | using DbgIntrinsicHash = |
| 304 | std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>; |
| 305 | auto makeHash = [](DbgInfoIntrinsic *D) -> DbgIntrinsicHash { |
| 306 | return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()}; |
| 307 | }; |
| 308 | SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics; |
| 309 | for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend(); |
| 310 | I != E; ++I) { |
| 311 | if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&*I)) |
| 312 | DbgIntrinsics.insert(makeHash(DII)); |
| 313 | else |
| 314 | break; |
| 315 | } |
| 316 | |
| 317 | while (I != E) { |
| 318 | Instruction *Inst = &*I++; |
| 319 | |
| 320 | // If the instruction's operands are invariant and it doesn't read or write |
| 321 | // memory, then it is safe to hoist. Doing this doesn't change the order of |
| 322 | // execution in the preheader, but does prevent the instruction from |
| 323 | // executing in each iteration of the loop. This means it is safe to hoist |
| 324 | // something that might trap, but isn't safe to hoist something that reads |
| 325 | // memory (without proving that the loop doesn't write). |
| 326 | if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && |
| 327 | !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) && |
| 328 | !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) { |
| 329 | Inst->moveBefore(LoopEntryBranch); |
| 330 | continue; |
| 331 | } |
| 332 | |
| 333 | // Otherwise, create a duplicate of the instruction. |
| 334 | Instruction *C = Inst->clone(); |
| 335 | |
| 336 | // Eagerly remap the operands of the instruction. |
| 337 | RemapInstruction(C, ValueMap, |
| 338 | RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); |
| 339 | |
| 340 | // Avoid inserting the same intrinsic twice. |
| 341 | if (auto *DII = dyn_cast<DbgInfoIntrinsic>(C)) |
| 342 | if (DbgIntrinsics.count(makeHash(DII))) { |
| 343 | C->deleteValue(); |
| 344 | continue; |
| 345 | } |
| 346 | |
| 347 | // With the operands remapped, see if the instruction constant folds or is |
| 348 | // otherwise simplifyable. This commonly occurs because the entry from PHI |
| 349 | // nodes allows icmps and other instructions to fold. |
| 350 | Value *V = SimplifyInstruction(C, SQ); |
| 351 | if (V && LI->replacementPreservesLCSSAForm(C, V)) { |
| 352 | // If so, then delete the temporary instruction and stick the folded value |
| 353 | // in the map. |
| 354 | ValueMap[Inst] = V; |
| 355 | if (!C->mayHaveSideEffects()) { |
| 356 | C->deleteValue(); |
| 357 | C = nullptr; |
| 358 | } |
| 359 | } else { |
| 360 | ValueMap[Inst] = C; |
| 361 | } |
| 362 | if (C) { |
| 363 | // Otherwise, stick the new instruction into the new block! |
| 364 | C->setName(Inst->getName()); |
| 365 | C->insertBefore(LoopEntryBranch); |
| 366 | |
| 367 | if (auto *II = dyn_cast<IntrinsicInst>(C)) |
| 368 | if (II->getIntrinsicID() == Intrinsic::assume) |
| 369 | AC->registerAssumption(II); |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | // Along with all the other instructions, we just cloned OrigHeader's |
| 374 | // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's |
| 375 | // successors by duplicating their incoming values for OrigHeader. |
| 376 | TerminatorInst *TI = OrigHeader->getTerminator(); |
| 377 | for (BasicBlock *SuccBB : TI->successors()) |
| 378 | for (BasicBlock::iterator BI = SuccBB->begin(); |
| 379 | PHINode *PN = dyn_cast<PHINode>(BI); ++BI) |
| 380 | PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); |
| 381 | |
| 382 | // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove |
| 383 | // OrigPreHeader's old terminator (the original branch into the loop), and |
| 384 | // remove the corresponding incoming values from the PHI nodes in OrigHeader. |
| 385 | LoopEntryBranch->eraseFromParent(); |
| 386 | |
| 387 | |
| 388 | SmallVector<PHINode*, 2> InsertedPHIs; |
| 389 | // If there were any uses of instructions in the duplicated block outside the |
| 390 | // loop, update them, inserting PHI nodes as required |
| 391 | RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, |
| 392 | &InsertedPHIs); |
| 393 | |
| 394 | // Attach dbg.value intrinsics to the new phis if that phi uses a value that |
| 395 | // previously had debug metadata attached. This keeps the debug info |
| 396 | // up-to-date in the loop body. |
| 397 | if (!InsertedPHIs.empty()) |
| 398 | insertDebugValuesForPHIs(OrigHeader, InsertedPHIs); |
| 399 | |
| 400 | // NewHeader is now the header of the loop. |
| 401 | L->moveToHeader(NewHeader); |
| 402 | assert(L->getHeader() == NewHeader && "Latch block is our new header"); |
| 403 | |
| 404 | // Inform DT about changes to the CFG. |
| 405 | if (DT) { |
| 406 | // The OrigPreheader branches to the NewHeader and Exit now. Then, inform |
| 407 | // the DT about the removed edge to the OrigHeader (that got removed). |
| 408 | SmallVector<DominatorTree::UpdateType, 3> Updates; |
| 409 | Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit}); |
| 410 | Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader}); |
| 411 | Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader}); |
| 412 | DT->applyUpdates(Updates); |
| 413 | } |
| 414 | |
| 415 | // At this point, we've finished our major CFG changes. As part of cloning |
| 416 | // the loop into the preheader we've simplified instructions and the |
| 417 | // duplicated conditional branch may now be branching on a constant. If it is |
| 418 | // branching on a constant and if that constant means that we enter the loop, |
| 419 | // then we fold away the cond branch to an uncond branch. This simplifies the |
| 420 | // loop in cases important for nested loops, and it also means we don't have |
| 421 | // to split as many edges. |
| 422 | BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); |
| 423 | assert(PHBI->isConditional() && "Should be clone of BI condbr!"); |
| 424 | if (!isa<ConstantInt>(PHBI->getCondition()) || |
| 425 | PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) != |
| 426 | NewHeader) { |
| 427 | // The conditional branch can't be folded, handle the general case. |
| 428 | // Split edges as necessary to preserve LoopSimplify form. |
| 429 | |
| 430 | // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and |
| 431 | // thus is not a preheader anymore. |
| 432 | // Split the edge to form a real preheader. |
| 433 | BasicBlock *NewPH = SplitCriticalEdge( |
| 434 | OrigPreheader, NewHeader, |
| 435 | CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); |
| 436 | NewPH->setName(NewHeader->getName() + ".lr.ph"); |
| 437 | |
| 438 | // Preserve canonical loop form, which means that 'Exit' should have only |
| 439 | // one predecessor. Note that Exit could be an exit block for multiple |
| 440 | // nested loops, causing both of the edges to now be critical and need to |
| 441 | // be split. |
| 442 | SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit)); |
| 443 | bool SplitLatchEdge = false; |
| 444 | for (BasicBlock *ExitPred : ExitPreds) { |
| 445 | // We only need to split loop exit edges. |
| 446 | Loop *PredLoop = LI->getLoopFor(ExitPred); |
| 447 | if (!PredLoop || PredLoop->contains(Exit)) |
| 448 | continue; |
| 449 | if (isa<IndirectBrInst>(ExitPred->getTerminator())) |
| 450 | continue; |
| 451 | SplitLatchEdge |= L->getLoopLatch() == ExitPred; |
| 452 | BasicBlock *ExitSplit = SplitCriticalEdge( |
| 453 | ExitPred, Exit, |
| 454 | CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); |
| 455 | ExitSplit->moveBefore(Exit); |
| 456 | } |
| 457 | assert(SplitLatchEdge && |
| 458 | "Despite splitting all preds, failed to split latch exit?"); |
| 459 | } else { |
| 460 | // We can fold the conditional branch in the preheader, this makes things |
| 461 | // simpler. The first step is to remove the extra edge to the Exit block. |
| 462 | Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); |
| 463 | BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); |
| 464 | NewBI->setDebugLoc(PHBI->getDebugLoc()); |
| 465 | PHBI->eraseFromParent(); |
| 466 | |
| 467 | // With our CFG finalized, update DomTree if it is available. |
| 468 | if (DT) DT->deleteEdge(OrigPreheader, Exit); |
| 469 | } |
| 470 | |
| 471 | assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); |
| 472 | assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); |
| 473 | |
| 474 | // Now that the CFG and DomTree are in a consistent state again, try to merge |
| 475 | // the OrigHeader block into OrigLatch. This will succeed if they are |
| 476 | // connected by an unconditional branch. This is just a cleanup so the |
| 477 | // emitted code isn't too gross in this common case. |
| 478 | MergeBlockIntoPredecessor(OrigHeader, DT, LI); |
| 479 | |
| 480 | DEBUG(dbgs() << "LoopRotation: into "; L->dump()); |
| 481 | |
| 482 | ++NumRotated; |
| 483 | return true; |
| 484 | } |
| 485 | |
| 486 | /// Determine whether the instructions in this range may be safely and cheaply |
| 487 | /// speculated. This is not an important enough situation to develop complex |
| 488 | /// heuristics. We handle a single arithmetic instruction along with any type |
| 489 | /// conversions. |
| 490 | static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, |
| 491 | BasicBlock::iterator End, Loop *L) { |
| 492 | bool seenIncrement = false; |
| 493 | bool MultiExitLoop = false; |
| 494 | |
| 495 | if (!L->getExitingBlock()) |
| 496 | MultiExitLoop = true; |
| 497 | |
| 498 | for (BasicBlock::iterator I = Begin; I != End; ++I) { |
| 499 | |
| 500 | if (!isSafeToSpeculativelyExecute(&*I)) |
| 501 | return false; |
| 502 | |
| 503 | if (isa<DbgInfoIntrinsic>(I)) |
| 504 | continue; |
| 505 | |
| 506 | switch (I->getOpcode()) { |
| 507 | default: |
| 508 | return false; |
| 509 | case Instruction::GetElementPtr: |
| 510 | // GEPs are cheap if all indices are constant. |
| 511 | if (!cast<GEPOperator>(I)->hasAllConstantIndices()) |
| 512 | return false; |
| 513 | // fall-thru to increment case |
| 514 | LLVM_FALLTHROUGH; |
| 515 | case Instruction::Add: |
| 516 | case Instruction::Sub: |
| 517 | case Instruction::And: |
| 518 | case Instruction::Or: |
| 519 | case Instruction::Xor: |
| 520 | case Instruction::Shl: |
| 521 | case Instruction::LShr: |
| 522 | case Instruction::AShr: { |
| 523 | Value *IVOpnd = |
| 524 | !isa<Constant>(I->getOperand(0)) |
| 525 | ? I->getOperand(0) |
| 526 | : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr; |
| 527 | if (!IVOpnd) |
| 528 | return false; |
| 529 | |
| 530 | // If increment operand is used outside of the loop, this speculation |
| 531 | // could cause extra live range interference. |
| 532 | if (MultiExitLoop) { |
| 533 | for (User *UseI : IVOpnd->users()) { |
| 534 | auto *UserInst = cast<Instruction>(UseI); |
| 535 | if (!L->contains(UserInst)) |
| 536 | return false; |
| 537 | } |
| 538 | } |
| 539 | |
| 540 | if (seenIncrement) |
| 541 | return false; |
| 542 | seenIncrement = true; |
| 543 | break; |
| 544 | } |
| 545 | case Instruction::Trunc: |
| 546 | case Instruction::ZExt: |
| 547 | case Instruction::SExt: |
| 548 | // ignore type conversions |
| 549 | break; |
| 550 | } |
| 551 | } |
| 552 | return true; |
| 553 | } |
| 554 | |
| 555 | /// Fold the loop tail into the loop exit by speculating the loop tail |
| 556 | /// instructions. Typically, this is a single post-increment. In the case of a |
| 557 | /// simple 2-block loop, hoisting the increment can be much better than |
| 558 | /// duplicating the entire loop header. In the case of loops with early exits, |
| 559 | /// rotation will not work anyway, but simplifyLoopLatch will put the loop in |
| 560 | /// canonical form so downstream passes can handle it. |
| 561 | /// |
| 562 | /// I don't believe this invalidates SCEV. |
| 563 | bool LoopRotate::simplifyLoopLatch(Loop *L) { |
| 564 | BasicBlock *Latch = L->getLoopLatch(); |
| 565 | if (!Latch || Latch->hasAddressTaken()) |
| 566 | return false; |
| 567 | |
| 568 | BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator()); |
| 569 | if (!Jmp || !Jmp->isUnconditional()) |
| 570 | return false; |
| 571 | |
| 572 | BasicBlock *LastExit = Latch->getSinglePredecessor(); |
| 573 | if (!LastExit || !L->isLoopExiting(LastExit)) |
| 574 | return false; |
| 575 | |
| 576 | BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator()); |
| 577 | if (!BI) |
| 578 | return false; |
| 579 | |
| 580 | if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L)) |
| 581 | return false; |
| 582 | |
| 583 | DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " |
| 584 | << LastExit->getName() << "\n"); |
| 585 | |
| 586 | // Hoist the instructions from Latch into LastExit. |
| 587 | LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), |
| 588 | Latch->begin(), Jmp->getIterator()); |
| 589 | |
| 590 | unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; |
| 591 | BasicBlock *Header = Jmp->getSuccessor(0); |
| 592 | assert(Header == L->getHeader() && "expected a backward branch"); |
| 593 | |
| 594 | // Remove Latch from the CFG so that LastExit becomes the new Latch. |
| 595 | BI->setSuccessor(FallThruPath, Header); |
| 596 | Latch->replaceSuccessorsPhiUsesWith(LastExit); |
| 597 | Jmp->eraseFromParent(); |
| 598 | |
| 599 | // Nuke the Latch block. |
| 600 | assert(Latch->empty() && "unable to evacuate Latch"); |
| 601 | LI->removeBlock(Latch); |
| 602 | if (DT) |
| 603 | DT->eraseNode(Latch); |
| 604 | Latch->eraseFromParent(); |
| 605 | return true; |
| 606 | } |
| 607 | |
| 608 | /// Rotate \c L, and return true if any modification was made. |
| 609 | bool LoopRotate::processLoop(Loop *L) { |
| 610 | // Save the loop metadata. |
| 611 | MDNode *LoopMD = L->getLoopID(); |
| 612 | |
Jin Lin | 585f269 | 2018-04-19 20:29:43 +0000 | [diff] [blame] | 613 | bool SimplifiedLatch = false; |
| 614 | |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 615 | // Simplify the loop latch before attempting to rotate the header |
| 616 | // upward. Rotation may not be needed if the loop tail can be folded into the |
| 617 | // loop exit. |
Jin Lin | 585f269 | 2018-04-19 20:29:43 +0000 | [diff] [blame] | 618 | if (!RotationOnly) |
| 619 | SimplifiedLatch = simplifyLoopLatch(L); |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 620 | |
| 621 | bool MadeChange = rotateLoop(L, SimplifiedLatch); |
| 622 | assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) && |
| 623 | "Loop latch should be exiting after loop-rotate."); |
| 624 | |
| 625 | // Restore the loop metadata. |
| 626 | // NB! We presume LoopRotation DOESN'T ADD its own metadata. |
| 627 | if ((MadeChange || SimplifiedLatch) && LoopMD) |
| 628 | L->setLoopID(LoopMD); |
| 629 | |
| 630 | return MadeChange || SimplifiedLatch; |
| 631 | } |
| 632 | |
| 633 | |
| 634 | /// The utility to convert a loop into a loop with bottom test. |
Jin Lin | 585f269 | 2018-04-19 20:29:43 +0000 | [diff] [blame] | 635 | bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, |
| 636 | AssumptionCache *AC, DominatorTree *DT, |
| 637 | ScalarEvolution *SE, const SimplifyQuery &SQ, |
| 638 | bool RotationOnly = true, |
| 639 | unsigned Threshold = unsigned(-1), |
| 640 | bool IsUtilMode = true) { |
| 641 | LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, SQ, RotationOnly, IsUtilMode); |
David Green | b0aa36f | 2018-03-29 08:48:15 +0000 | [diff] [blame] | 642 | |
| 643 | return LR.processLoop(L); |
| 644 | } |