blob: 498f4c4f7f31e83846b8bf09412985f5ebdb845b [file] [log] [blame]
Ayal Zaks1f58dda2017-08-27 12:55:46 +00001//===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// This is the LLVM vectorization plan. It represents a candidate for
12/// vectorization, allowing to plan and optimize how to vectorize a given loop
13/// before generating LLVM-IR.
14/// The vectorizer uses vectorization plans to estimate the costs of potential
15/// candidates and if profitable to execute the desired plan, generating vector
16/// LLVM-IR code.
17///
18//===----------------------------------------------------------------------===//
19
20#include "VPlan.h"
21#include "llvm/ADT/PostOrderIterator.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/Support/GraphWriter.h"
26#include "llvm/Transforms/Utils/BasicBlockUtils.h"
27
28using namespace llvm;
29
30#define DEBUG_TYPE "vplan"
31
32/// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
33const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
34 const VPBlockBase *Block = this;
35 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
36 Block = Region->getEntry();
37 return cast<VPBasicBlock>(Block);
38}
39
40VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
41 VPBlockBase *Block = this;
42 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
43 Block = Region->getEntry();
44 return cast<VPBasicBlock>(Block);
45}
46
47/// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
48const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
49 const VPBlockBase *Block = this;
50 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
51 Block = Region->getExit();
52 return cast<VPBasicBlock>(Block);
53}
54
55VPBasicBlock *VPBlockBase::getExitBasicBlock() {
56 VPBlockBase *Block = this;
57 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
58 Block = Region->getExit();
59 return cast<VPBasicBlock>(Block);
60}
61
62VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
63 if (!Successors.empty() || !Parent)
64 return this;
65 assert(Parent->getExit() == this &&
66 "Block w/o successors not the exit of its parent.");
67 return Parent->getEnclosingBlockWithSuccessors();
68}
69
70VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
71 if (!Predecessors.empty() || !Parent)
72 return this;
73 assert(Parent->getEntry() == this &&
74 "Block w/o predecessors not the entry of its parent.");
75 return Parent->getEnclosingBlockWithPredecessors();
76}
77
78void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
79 SmallVector<VPBlockBase *, 8> Blocks;
80 for (VPBlockBase *Block : depth_first(Entry))
81 Blocks.push_back(Block);
82
83 for (VPBlockBase *Block : Blocks)
84 delete Block;
85}
86
87BasicBlock *
88VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
89 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
90 // Pred stands for Predessor. Prev stands for Previous - last visited/created.
91 BasicBlock *PrevBB = CFG.PrevBB;
92 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
93 PrevBB->getParent(), CFG.LastBB);
94 DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
95
96 // Hook up the new basic block to its predecessors.
97 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
98 VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
99 auto &PredVPSuccessors = PredVPBB->getSuccessors();
100 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
101 assert(PredBB && "Predecessor basic-block not found building successor.");
102 auto *PredBBTerminator = PredBB->getTerminator();
103 DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
104 if (isa<UnreachableInst>(PredBBTerminator)) {
105 assert(PredVPSuccessors.size() == 1 &&
106 "Predecessor ending w/o branch must have single successor.");
107 PredBBTerminator->eraseFromParent();
108 BranchInst::Create(NewBB, PredBB);
109 } else {
110 assert(PredVPSuccessors.size() == 2 &&
111 "Predecessor ending with branch must have two successors.");
112 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
113 assert(!PredBBTerminator->getSuccessor(idx) &&
114 "Trying to reset an existing successor block.");
115 PredBBTerminator->setSuccessor(idx, NewBB);
116 }
117 }
118 return NewBB;
119}
120
121void VPBasicBlock::execute(VPTransformState *State) {
122 bool Replica = State->Instance &&
123 !(State->Instance->Part == 0 && State->Instance->Lane == 0);
124 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
125 VPBlockBase *SingleHPred = nullptr;
126 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
127
128 // 1. Create an IR basic block, or reuse the last one if possible.
129 // The last IR basic block is reused, as an optimization, in three cases:
130 // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
131 // B. when the current VPBB has a single (hierarchical) predecessor which
132 // is PrevVPBB and the latter has a single (hierarchical) successor; and
133 // C. when the current VPBB is an entry of a region replica - where PrevVPBB
134 // is the exit of this region from a previous instance, or the predecessor
135 // of this region.
136 if (PrevVPBB && /* A */
137 !((SingleHPred = getSingleHierarchicalPredecessor()) &&
138 SingleHPred->getExitBasicBlock() == PrevVPBB &&
139 PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
140 !(Replica && getPredecessors().empty())) { /* C */
141
142 NewBB = createEmptyBasicBlock(State->CFG);
143 State->Builder.SetInsertPoint(NewBB);
144 // Temporarily terminate with unreachable until CFG is rewired.
145 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
146 State->Builder.SetInsertPoint(Terminator);
147 // Register NewBB in its loop. In innermost loops its the same for all BB's.
148 Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
149 L->addBasicBlockToLoop(NewBB, *State->LI);
150 State->CFG.PrevBB = NewBB;
151 }
152
153 // 2. Fill the IR basic block with IR instructions.
154 DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
155 << " in BB:" << NewBB->getName() << '\n');
156
157 State->CFG.VPBB2IRBB[this] = NewBB;
158 State->CFG.PrevVPBB = this;
159
160 for (VPRecipeBase &Recipe : Recipes)
161 Recipe.execute(*State);
162
163 DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
164}
165
166void VPRegionBlock::execute(VPTransformState *State) {
167 ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
168
169 if (!isReplicator()) {
170 // Visit the VPBlocks connected to "this", starting from it.
171 for (VPBlockBase *Block : RPOT) {
172 DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
173 Block->execute(State);
174 }
175 return;
176 }
177
178 assert(!State->Instance && "Replicating a Region with non-null instance.");
179
180 // Enter replicating mode.
181 State->Instance = {0, 0};
182
183 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
184 State->Instance->Part = Part;
185 for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) {
186 State->Instance->Lane = Lane;
187 // Visit the VPBlocks connected to \p this, starting from it.
188 for (VPBlockBase *Block : RPOT) {
189 DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
190 Block->execute(State);
191 }
192 }
193 }
194
195 // Exit replicating mode.
196 State->Instance.reset();
197}
198
199/// Generate the code inside the body of the vectorized loop. Assumes a single
200/// LoopVectorBody basic-block was created for this. Introduce additional
201/// basic-blocks as needed, and fill them all.
202void VPlan::execute(VPTransformState *State) {
203 BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
204 BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
205 assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
206 BasicBlock *VectorLatchBB = VectorHeaderBB;
207
208 // 1. Make room to generate basic-blocks inside loop body if needed.
209 VectorLatchBB = VectorHeaderBB->splitBasicBlock(
210 VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
211 Loop *L = State->LI->getLoopFor(VectorHeaderBB);
212 L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
213 // Remove the edge between Header and Latch to allow other connections.
214 // Temporarily terminate with unreachable until CFG is rewired.
215 // Note: this asserts the generated code's assumption that
216 // getFirstInsertionPt() can be dereferenced into an Instruction.
217 VectorHeaderBB->getTerminator()->eraseFromParent();
218 State->Builder.SetInsertPoint(VectorHeaderBB);
219 UnreachableInst *Terminator = State->Builder.CreateUnreachable();
220 State->Builder.SetInsertPoint(Terminator);
221
222 // 2. Generate code in loop body.
223 State->CFG.PrevVPBB = nullptr;
224 State->CFG.PrevBB = VectorHeaderBB;
225 State->CFG.LastBB = VectorLatchBB;
226
227 for (VPBlockBase *Block : depth_first(Entry))
228 Block->execute(State);
229
230 // 3. Merge the temporary latch created with the last basic-block filled.
231 BasicBlock *LastBB = State->CFG.PrevBB;
232 // Connect LastBB to VectorLatchBB to facilitate their merge.
233 assert(isa<UnreachableInst>(LastBB->getTerminator()) &&
234 "Expected VPlan CFG to terminate with unreachable");
235 LastBB->getTerminator()->eraseFromParent();
236 BranchInst::Create(VectorLatchBB, LastBB);
237
238 // Merge LastBB with Latch.
239 bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
240 (void)Merged;
241 assert(Merged && "Could not merge last basic block with latch.");
242 VectorLatchBB = LastBB;
243
244 updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB);
245}
246
247void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
248 BasicBlock *LoopLatchBB) {
249 BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
250 assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
251 DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB);
252 // The vector body may be more than a single basic-block by this point.
253 // Update the dominator tree information inside the vector body by propagating
254 // it from header to latch, expecting only triangular control-flow, if any.
255 BasicBlock *PostDomSucc = nullptr;
256 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
257 // Get the list of successors of this block.
258 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
259 assert(Succs.size() <= 2 &&
260 "Basic block in vector loop has more than 2 successors.");
261 PostDomSucc = Succs[0];
262 if (Succs.size() == 1) {
263 assert(PostDomSucc->getSinglePredecessor() &&
264 "PostDom successor has more than one predecessor.");
265 DT->addNewBlock(PostDomSucc, BB);
266 continue;
267 }
268 BasicBlock *InterimSucc = Succs[1];
269 if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
270 PostDomSucc = Succs[1];
271 InterimSucc = Succs[0];
272 }
273 assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
274 "One successor of a basic block does not lead to the other.");
275 assert(InterimSucc->getSinglePredecessor() &&
276 "Interim successor has more than one predecessor.");
277 assert(std::distance(pred_begin(PostDomSucc), pred_end(PostDomSucc)) == 2 &&
278 "PostDom successor has more than two predecessors.");
279 DT->addNewBlock(InterimSucc, BB);
280 DT->addNewBlock(PostDomSucc, BB);
281 }
282}
283
284const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
285 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
286 Twine(getOrCreateBID(Block));
287}
288
289const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
290 const std::string &Name = Block->getName();
291 if (!Name.empty())
292 return Name;
293 return "VPB" + Twine(getOrCreateBID(Block));
294}
295
296void VPlanPrinter::dump() {
297 Depth = 1;
298 bumpIndent(0);
299 OS << "digraph VPlan {\n";
300 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
301 if (!Plan.getName().empty())
302 OS << "\\n" << DOT::EscapeString(Plan.getName());
303 OS << "\"]\n";
304 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
305 OS << "edge [fontname=Courier, fontsize=30]\n";
306 OS << "compound=true\n";
307
308 for (VPBlockBase *Block : depth_first(Plan.getEntry()))
309 dumpBlock(Block);
310
311 OS << "}\n";
312}
313
314void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
315 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
316 dumpBasicBlock(BasicBlock);
317 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
318 dumpRegion(Region);
319 else
320 llvm_unreachable("Unsupported kind of VPBlock.");
321}
322
323void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
324 bool Hidden, const Twine &Label) {
325 // Due to "dot" we print an edge between two regions as an edge between the
326 // exit basic block and the entry basic of the respective regions.
327 const VPBlockBase *Tail = From->getExitBasicBlock();
328 const VPBlockBase *Head = To->getEntryBasicBlock();
329 OS << Indent << getUID(Tail) << " -> " << getUID(Head);
330 OS << " [ label=\"" << Label << '\"';
331 if (Tail != From)
332 OS << " ltail=" << getUID(From);
333 if (Head != To)
334 OS << " lhead=" << getUID(To);
335 if (Hidden)
336 OS << "; splines=none";
337 OS << "]\n";
338}
339
340void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
341 auto &Successors = Block->getSuccessors();
342 if (Successors.size() == 1)
343 drawEdge(Block, Successors.front(), false, "");
344 else if (Successors.size() == 2) {
345 drawEdge(Block, Successors.front(), false, "T");
346 drawEdge(Block, Successors.back(), false, "F");
347 } else {
348 unsigned SuccessorNumber = 0;
349 for (auto *Successor : Successors)
350 drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
351 }
352}
353
354void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
355 OS << Indent << getUID(BasicBlock) << " [label =\n";
356 bumpIndent(1);
357 OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\"";
358 bumpIndent(1);
359 for (const VPRecipeBase &Recipe : *BasicBlock)
360 Recipe.print(OS, Indent);
361 bumpIndent(-2);
362 OS << "\n" << Indent << "]\n";
363 dumpEdges(BasicBlock);
364}
365
366void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
367 OS << Indent << "subgraph " << getUID(Region) << " {\n";
368 bumpIndent(1);
369 OS << Indent << "fontname=Courier\n"
370 << Indent << "label=\""
371 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
372 << DOT::EscapeString(Region->getName()) << "\"\n";
373 // Dump the blocks of the region.
374 assert(Region->getEntry() && "Region contains no inner blocks.");
375 for (const VPBlockBase *Block : depth_first(Region->getEntry()))
376 dumpBlock(Block);
377 bumpIndent(-1);
378 OS << Indent << "}\n";
379 dumpEdges(Region);
380}
381
382void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) {
383 std::string IngredientString;
384 raw_string_ostream RSO(IngredientString);
385 if (auto *Inst = dyn_cast<Instruction>(V)) {
386 if (!Inst->getType()->isVoidTy()) {
387 Inst->printAsOperand(RSO, false);
388 RSO << " = ";
389 }
390 RSO << Inst->getOpcodeName() << " ";
391 unsigned E = Inst->getNumOperands();
392 if (E > 0) {
393 Inst->getOperand(0)->printAsOperand(RSO, false);
394 for (unsigned I = 1; I < E; ++I)
395 Inst->getOperand(I)->printAsOperand(RSO << ", ", false);
396 }
397 } else // !Inst
398 V->printAsOperand(RSO, false);
399 RSO.flush();
400 O << DOT::EscapeString(IngredientString);
401}