Ayal Zaks | 1f58dda | 2017-08-27 12:55:46 +0000 | [diff] [blame^] | 1 | //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | /// |
| 10 | /// \file |
| 11 | /// This is the LLVM vectorization plan. It represents a candidate for |
| 12 | /// vectorization, allowing to plan and optimize how to vectorize a given loop |
| 13 | /// before generating LLVM-IR. |
| 14 | /// The vectorizer uses vectorization plans to estimate the costs of potential |
| 15 | /// candidates and if profitable to execute the desired plan, generating vector |
| 16 | /// LLVM-IR code. |
| 17 | /// |
| 18 | //===----------------------------------------------------------------------===// |
| 19 | |
| 20 | #include "VPlan.h" |
| 21 | #include "llvm/ADT/PostOrderIterator.h" |
| 22 | #include "llvm/Analysis/LoopInfo.h" |
| 23 | #include "llvm/IR/BasicBlock.h" |
| 24 | #include "llvm/IR/Dominators.h" |
| 25 | #include "llvm/Support/GraphWriter.h" |
| 26 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 27 | |
| 28 | using namespace llvm; |
| 29 | |
| 30 | #define DEBUG_TYPE "vplan" |
| 31 | |
| 32 | /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. |
| 33 | const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { |
| 34 | const VPBlockBase *Block = this; |
| 35 | while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) |
| 36 | Block = Region->getEntry(); |
| 37 | return cast<VPBasicBlock>(Block); |
| 38 | } |
| 39 | |
| 40 | VPBasicBlock *VPBlockBase::getEntryBasicBlock() { |
| 41 | VPBlockBase *Block = this; |
| 42 | while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) |
| 43 | Block = Region->getEntry(); |
| 44 | return cast<VPBasicBlock>(Block); |
| 45 | } |
| 46 | |
| 47 | /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. |
| 48 | const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { |
| 49 | const VPBlockBase *Block = this; |
| 50 | while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) |
| 51 | Block = Region->getExit(); |
| 52 | return cast<VPBasicBlock>(Block); |
| 53 | } |
| 54 | |
| 55 | VPBasicBlock *VPBlockBase::getExitBasicBlock() { |
| 56 | VPBlockBase *Block = this; |
| 57 | while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) |
| 58 | Block = Region->getExit(); |
| 59 | return cast<VPBasicBlock>(Block); |
| 60 | } |
| 61 | |
| 62 | VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { |
| 63 | if (!Successors.empty() || !Parent) |
| 64 | return this; |
| 65 | assert(Parent->getExit() == this && |
| 66 | "Block w/o successors not the exit of its parent."); |
| 67 | return Parent->getEnclosingBlockWithSuccessors(); |
| 68 | } |
| 69 | |
| 70 | VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { |
| 71 | if (!Predecessors.empty() || !Parent) |
| 72 | return this; |
| 73 | assert(Parent->getEntry() == this && |
| 74 | "Block w/o predecessors not the entry of its parent."); |
| 75 | return Parent->getEnclosingBlockWithPredecessors(); |
| 76 | } |
| 77 | |
| 78 | void VPBlockBase::deleteCFG(VPBlockBase *Entry) { |
| 79 | SmallVector<VPBlockBase *, 8> Blocks; |
| 80 | for (VPBlockBase *Block : depth_first(Entry)) |
| 81 | Blocks.push_back(Block); |
| 82 | |
| 83 | for (VPBlockBase *Block : Blocks) |
| 84 | delete Block; |
| 85 | } |
| 86 | |
| 87 | BasicBlock * |
| 88 | VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { |
| 89 | // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. |
| 90 | // Pred stands for Predessor. Prev stands for Previous - last visited/created. |
| 91 | BasicBlock *PrevBB = CFG.PrevBB; |
| 92 | BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), |
| 93 | PrevBB->getParent(), CFG.LastBB); |
| 94 | DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); |
| 95 | |
| 96 | // Hook up the new basic block to its predecessors. |
| 97 | for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { |
| 98 | VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); |
| 99 | auto &PredVPSuccessors = PredVPBB->getSuccessors(); |
| 100 | BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; |
| 101 | assert(PredBB && "Predecessor basic-block not found building successor."); |
| 102 | auto *PredBBTerminator = PredBB->getTerminator(); |
| 103 | DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); |
| 104 | if (isa<UnreachableInst>(PredBBTerminator)) { |
| 105 | assert(PredVPSuccessors.size() == 1 && |
| 106 | "Predecessor ending w/o branch must have single successor."); |
| 107 | PredBBTerminator->eraseFromParent(); |
| 108 | BranchInst::Create(NewBB, PredBB); |
| 109 | } else { |
| 110 | assert(PredVPSuccessors.size() == 2 && |
| 111 | "Predecessor ending with branch must have two successors."); |
| 112 | unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; |
| 113 | assert(!PredBBTerminator->getSuccessor(idx) && |
| 114 | "Trying to reset an existing successor block."); |
| 115 | PredBBTerminator->setSuccessor(idx, NewBB); |
| 116 | } |
| 117 | } |
| 118 | return NewBB; |
| 119 | } |
| 120 | |
| 121 | void VPBasicBlock::execute(VPTransformState *State) { |
| 122 | bool Replica = State->Instance && |
| 123 | !(State->Instance->Part == 0 && State->Instance->Lane == 0); |
| 124 | VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; |
| 125 | VPBlockBase *SingleHPred = nullptr; |
| 126 | BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. |
| 127 | |
| 128 | // 1. Create an IR basic block, or reuse the last one if possible. |
| 129 | // The last IR basic block is reused, as an optimization, in three cases: |
| 130 | // A. the first VPBB reuses the loop header BB - when PrevVPBB is null; |
| 131 | // B. when the current VPBB has a single (hierarchical) predecessor which |
| 132 | // is PrevVPBB and the latter has a single (hierarchical) successor; and |
| 133 | // C. when the current VPBB is an entry of a region replica - where PrevVPBB |
| 134 | // is the exit of this region from a previous instance, or the predecessor |
| 135 | // of this region. |
| 136 | if (PrevVPBB && /* A */ |
| 137 | !((SingleHPred = getSingleHierarchicalPredecessor()) && |
| 138 | SingleHPred->getExitBasicBlock() == PrevVPBB && |
| 139 | PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ |
| 140 | !(Replica && getPredecessors().empty())) { /* C */ |
| 141 | |
| 142 | NewBB = createEmptyBasicBlock(State->CFG); |
| 143 | State->Builder.SetInsertPoint(NewBB); |
| 144 | // Temporarily terminate with unreachable until CFG is rewired. |
| 145 | UnreachableInst *Terminator = State->Builder.CreateUnreachable(); |
| 146 | State->Builder.SetInsertPoint(Terminator); |
| 147 | // Register NewBB in its loop. In innermost loops its the same for all BB's. |
| 148 | Loop *L = State->LI->getLoopFor(State->CFG.LastBB); |
| 149 | L->addBasicBlockToLoop(NewBB, *State->LI); |
| 150 | State->CFG.PrevBB = NewBB; |
| 151 | } |
| 152 | |
| 153 | // 2. Fill the IR basic block with IR instructions. |
| 154 | DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() |
| 155 | << " in BB:" << NewBB->getName() << '\n'); |
| 156 | |
| 157 | State->CFG.VPBB2IRBB[this] = NewBB; |
| 158 | State->CFG.PrevVPBB = this; |
| 159 | |
| 160 | for (VPRecipeBase &Recipe : Recipes) |
| 161 | Recipe.execute(*State); |
| 162 | |
| 163 | DEBUG(dbgs() << "LV: filled BB:" << *NewBB); |
| 164 | } |
| 165 | |
| 166 | void VPRegionBlock::execute(VPTransformState *State) { |
| 167 | ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry); |
| 168 | |
| 169 | if (!isReplicator()) { |
| 170 | // Visit the VPBlocks connected to "this", starting from it. |
| 171 | for (VPBlockBase *Block : RPOT) { |
| 172 | DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); |
| 173 | Block->execute(State); |
| 174 | } |
| 175 | return; |
| 176 | } |
| 177 | |
| 178 | assert(!State->Instance && "Replicating a Region with non-null instance."); |
| 179 | |
| 180 | // Enter replicating mode. |
| 181 | State->Instance = {0, 0}; |
| 182 | |
| 183 | for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { |
| 184 | State->Instance->Part = Part; |
| 185 | for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) { |
| 186 | State->Instance->Lane = Lane; |
| 187 | // Visit the VPBlocks connected to \p this, starting from it. |
| 188 | for (VPBlockBase *Block : RPOT) { |
| 189 | DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); |
| 190 | Block->execute(State); |
| 191 | } |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | // Exit replicating mode. |
| 196 | State->Instance.reset(); |
| 197 | } |
| 198 | |
| 199 | /// Generate the code inside the body of the vectorized loop. Assumes a single |
| 200 | /// LoopVectorBody basic-block was created for this. Introduce additional |
| 201 | /// basic-blocks as needed, and fill them all. |
| 202 | void VPlan::execute(VPTransformState *State) { |
| 203 | BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; |
| 204 | BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); |
| 205 | assert(VectorHeaderBB && "Loop preheader does not have a single successor."); |
| 206 | BasicBlock *VectorLatchBB = VectorHeaderBB; |
| 207 | |
| 208 | // 1. Make room to generate basic-blocks inside loop body if needed. |
| 209 | VectorLatchBB = VectorHeaderBB->splitBasicBlock( |
| 210 | VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch"); |
| 211 | Loop *L = State->LI->getLoopFor(VectorHeaderBB); |
| 212 | L->addBasicBlockToLoop(VectorLatchBB, *State->LI); |
| 213 | // Remove the edge between Header and Latch to allow other connections. |
| 214 | // Temporarily terminate with unreachable until CFG is rewired. |
| 215 | // Note: this asserts the generated code's assumption that |
| 216 | // getFirstInsertionPt() can be dereferenced into an Instruction. |
| 217 | VectorHeaderBB->getTerminator()->eraseFromParent(); |
| 218 | State->Builder.SetInsertPoint(VectorHeaderBB); |
| 219 | UnreachableInst *Terminator = State->Builder.CreateUnreachable(); |
| 220 | State->Builder.SetInsertPoint(Terminator); |
| 221 | |
| 222 | // 2. Generate code in loop body. |
| 223 | State->CFG.PrevVPBB = nullptr; |
| 224 | State->CFG.PrevBB = VectorHeaderBB; |
| 225 | State->CFG.LastBB = VectorLatchBB; |
| 226 | |
| 227 | for (VPBlockBase *Block : depth_first(Entry)) |
| 228 | Block->execute(State); |
| 229 | |
| 230 | // 3. Merge the temporary latch created with the last basic-block filled. |
| 231 | BasicBlock *LastBB = State->CFG.PrevBB; |
| 232 | // Connect LastBB to VectorLatchBB to facilitate their merge. |
| 233 | assert(isa<UnreachableInst>(LastBB->getTerminator()) && |
| 234 | "Expected VPlan CFG to terminate with unreachable"); |
| 235 | LastBB->getTerminator()->eraseFromParent(); |
| 236 | BranchInst::Create(VectorLatchBB, LastBB); |
| 237 | |
| 238 | // Merge LastBB with Latch. |
| 239 | bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI); |
| 240 | (void)Merged; |
| 241 | assert(Merged && "Could not merge last basic block with latch."); |
| 242 | VectorLatchBB = LastBB; |
| 243 | |
| 244 | updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); |
| 245 | } |
| 246 | |
| 247 | void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, |
| 248 | BasicBlock *LoopLatchBB) { |
| 249 | BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); |
| 250 | assert(LoopHeaderBB && "Loop preheader does not have a single successor."); |
| 251 | DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB); |
| 252 | // The vector body may be more than a single basic-block by this point. |
| 253 | // Update the dominator tree information inside the vector body by propagating |
| 254 | // it from header to latch, expecting only triangular control-flow, if any. |
| 255 | BasicBlock *PostDomSucc = nullptr; |
| 256 | for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { |
| 257 | // Get the list of successors of this block. |
| 258 | std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); |
| 259 | assert(Succs.size() <= 2 && |
| 260 | "Basic block in vector loop has more than 2 successors."); |
| 261 | PostDomSucc = Succs[0]; |
| 262 | if (Succs.size() == 1) { |
| 263 | assert(PostDomSucc->getSinglePredecessor() && |
| 264 | "PostDom successor has more than one predecessor."); |
| 265 | DT->addNewBlock(PostDomSucc, BB); |
| 266 | continue; |
| 267 | } |
| 268 | BasicBlock *InterimSucc = Succs[1]; |
| 269 | if (PostDomSucc->getSingleSuccessor() == InterimSucc) { |
| 270 | PostDomSucc = Succs[1]; |
| 271 | InterimSucc = Succs[0]; |
| 272 | } |
| 273 | assert(InterimSucc->getSingleSuccessor() == PostDomSucc && |
| 274 | "One successor of a basic block does not lead to the other."); |
| 275 | assert(InterimSucc->getSinglePredecessor() && |
| 276 | "Interim successor has more than one predecessor."); |
| 277 | assert(std::distance(pred_begin(PostDomSucc), pred_end(PostDomSucc)) == 2 && |
| 278 | "PostDom successor has more than two predecessors."); |
| 279 | DT->addNewBlock(InterimSucc, BB); |
| 280 | DT->addNewBlock(PostDomSucc, BB); |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | const Twine VPlanPrinter::getUID(const VPBlockBase *Block) { |
| 285 | return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + |
| 286 | Twine(getOrCreateBID(Block)); |
| 287 | } |
| 288 | |
| 289 | const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { |
| 290 | const std::string &Name = Block->getName(); |
| 291 | if (!Name.empty()) |
| 292 | return Name; |
| 293 | return "VPB" + Twine(getOrCreateBID(Block)); |
| 294 | } |
| 295 | |
| 296 | void VPlanPrinter::dump() { |
| 297 | Depth = 1; |
| 298 | bumpIndent(0); |
| 299 | OS << "digraph VPlan {\n"; |
| 300 | OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; |
| 301 | if (!Plan.getName().empty()) |
| 302 | OS << "\\n" << DOT::EscapeString(Plan.getName()); |
| 303 | OS << "\"]\n"; |
| 304 | OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; |
| 305 | OS << "edge [fontname=Courier, fontsize=30]\n"; |
| 306 | OS << "compound=true\n"; |
| 307 | |
| 308 | for (VPBlockBase *Block : depth_first(Plan.getEntry())) |
| 309 | dumpBlock(Block); |
| 310 | |
| 311 | OS << "}\n"; |
| 312 | } |
| 313 | |
| 314 | void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { |
| 315 | if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) |
| 316 | dumpBasicBlock(BasicBlock); |
| 317 | else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) |
| 318 | dumpRegion(Region); |
| 319 | else |
| 320 | llvm_unreachable("Unsupported kind of VPBlock."); |
| 321 | } |
| 322 | |
| 323 | void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, |
| 324 | bool Hidden, const Twine &Label) { |
| 325 | // Due to "dot" we print an edge between two regions as an edge between the |
| 326 | // exit basic block and the entry basic of the respective regions. |
| 327 | const VPBlockBase *Tail = From->getExitBasicBlock(); |
| 328 | const VPBlockBase *Head = To->getEntryBasicBlock(); |
| 329 | OS << Indent << getUID(Tail) << " -> " << getUID(Head); |
| 330 | OS << " [ label=\"" << Label << '\"'; |
| 331 | if (Tail != From) |
| 332 | OS << " ltail=" << getUID(From); |
| 333 | if (Head != To) |
| 334 | OS << " lhead=" << getUID(To); |
| 335 | if (Hidden) |
| 336 | OS << "; splines=none"; |
| 337 | OS << "]\n"; |
| 338 | } |
| 339 | |
| 340 | void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { |
| 341 | auto &Successors = Block->getSuccessors(); |
| 342 | if (Successors.size() == 1) |
| 343 | drawEdge(Block, Successors.front(), false, ""); |
| 344 | else if (Successors.size() == 2) { |
| 345 | drawEdge(Block, Successors.front(), false, "T"); |
| 346 | drawEdge(Block, Successors.back(), false, "F"); |
| 347 | } else { |
| 348 | unsigned SuccessorNumber = 0; |
| 349 | for (auto *Successor : Successors) |
| 350 | drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { |
| 355 | OS << Indent << getUID(BasicBlock) << " [label =\n"; |
| 356 | bumpIndent(1); |
| 357 | OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\""; |
| 358 | bumpIndent(1); |
| 359 | for (const VPRecipeBase &Recipe : *BasicBlock) |
| 360 | Recipe.print(OS, Indent); |
| 361 | bumpIndent(-2); |
| 362 | OS << "\n" << Indent << "]\n"; |
| 363 | dumpEdges(BasicBlock); |
| 364 | } |
| 365 | |
| 366 | void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { |
| 367 | OS << Indent << "subgraph " << getUID(Region) << " {\n"; |
| 368 | bumpIndent(1); |
| 369 | OS << Indent << "fontname=Courier\n" |
| 370 | << Indent << "label=\"" |
| 371 | << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") |
| 372 | << DOT::EscapeString(Region->getName()) << "\"\n"; |
| 373 | // Dump the blocks of the region. |
| 374 | assert(Region->getEntry() && "Region contains no inner blocks."); |
| 375 | for (const VPBlockBase *Block : depth_first(Region->getEntry())) |
| 376 | dumpBlock(Block); |
| 377 | bumpIndent(-1); |
| 378 | OS << Indent << "}\n"; |
| 379 | dumpEdges(Region); |
| 380 | } |
| 381 | |
| 382 | void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) { |
| 383 | std::string IngredientString; |
| 384 | raw_string_ostream RSO(IngredientString); |
| 385 | if (auto *Inst = dyn_cast<Instruction>(V)) { |
| 386 | if (!Inst->getType()->isVoidTy()) { |
| 387 | Inst->printAsOperand(RSO, false); |
| 388 | RSO << " = "; |
| 389 | } |
| 390 | RSO << Inst->getOpcodeName() << " "; |
| 391 | unsigned E = Inst->getNumOperands(); |
| 392 | if (E > 0) { |
| 393 | Inst->getOperand(0)->printAsOperand(RSO, false); |
| 394 | for (unsigned I = 1; I < E; ++I) |
| 395 | Inst->getOperand(I)->printAsOperand(RSO << ", ", false); |
| 396 | } |
| 397 | } else // !Inst |
| 398 | V->printAsOperand(RSO, false); |
| 399 | RSO.flush(); |
| 400 | O << DOT::EscapeString(IngredientString); |
| 401 | } |