| Ayal Zaks | 1f58dda | 2017-08-27 12:55:46 +0000 | [diff] [blame] | 1 | //===- VPlan.h - Represent A Vectorizer Plan ------------------------------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | /// | 
|  | 10 | /// \file | 
|  | 11 | /// This file contains the declarations of the Vectorization Plan base classes: | 
|  | 12 | /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual | 
|  | 13 | ///    VPBlockBase, together implementing a Hierarchical CFG; | 
|  | 14 | /// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be | 
|  | 15 | ///    treated as proper graphs for generic algorithms; | 
|  | 16 | /// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained | 
|  | 17 | ///    within VPBasicBlocks; | 
|  | 18 | /// 4. The VPlan class holding a candidate for vectorization; | 
|  | 19 | /// 5. The VPlanPrinter class providing a way to print a plan in dot format. | 
|  | 20 | /// These are documented in docs/VectorizationPlan.rst. | 
|  | 21 | /// | 
|  | 22 | //===----------------------------------------------------------------------===// | 
|  | 23 |  | 
|  | 24 | #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H | 
|  | 25 | #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H | 
|  | 26 |  | 
|  | 27 | #include "llvm/ADT/GraphTraits.h" | 
|  | 28 | #include "llvm/ADT/SmallSet.h" | 
|  | 29 | #include "llvm/ADT/ilist.h" | 
|  | 30 | #include "llvm/ADT/ilist_node.h" | 
|  | 31 | #include "llvm/IR/IRBuilder.h" | 
|  | 32 | #include "llvm/Support/raw_ostream.h" | 
|  | 33 |  | 
|  | 34 | // The (re)use of existing LoopVectorize classes is subject to future VPlan | 
|  | 35 | // refactoring. | 
|  | 36 | namespace { | 
|  | 37 | // Forward declarations. | 
|  | 38 | //class InnerLoopVectorizer; | 
|  | 39 | class LoopVectorizationLegality; | 
|  | 40 | class LoopVectorizationCostModel; | 
|  | 41 | } // namespace | 
|  | 42 |  | 
|  | 43 | namespace llvm { | 
|  | 44 |  | 
|  | 45 | // Forward declarations. | 
|  | 46 | class BasicBlock; | 
|  | 47 | class InnerLoopVectorizer; | 
|  | 48 | class VPBasicBlock; | 
|  | 49 |  | 
|  | 50 | /// In what follows, the term "input IR" refers to code that is fed into the | 
|  | 51 | /// vectorizer whereas the term "output IR" refers to code that is generated by | 
|  | 52 | /// the vectorizer. | 
|  | 53 |  | 
|  | 54 | /// VPIteration represents a single point in the iteration space of the output | 
|  | 55 | /// (vectorized and/or unrolled) IR loop. | 
|  | 56 | struct VPIteration { | 
|  | 57 | unsigned Part; ///< in [0..UF) | 
|  | 58 | unsigned Lane; ///< in [0..VF) | 
|  | 59 | }; | 
|  | 60 |  | 
|  | 61 | /// This is a helper struct for maintaining vectorization state. It's used for | 
|  | 62 | /// mapping values from the original loop to their corresponding values in | 
|  | 63 | /// the new loop. Two mappings are maintained: one for vectorized values and | 
|  | 64 | /// one for scalarized values. Vectorized values are represented with UF | 
|  | 65 | /// vector values in the new loop, and scalarized values are represented with | 
|  | 66 | /// UF x VF scalar values in the new loop. UF and VF are the unroll and | 
|  | 67 | /// vectorization factors, respectively. | 
|  | 68 | /// | 
|  | 69 | /// Entries can be added to either map with setVectorValue and setScalarValue, | 
|  | 70 | /// which assert that an entry was not already added before. If an entry is to | 
|  | 71 | /// replace an existing one, call resetVectorValue and resetScalarValue. This is | 
|  | 72 | /// currently needed to modify the mapped values during "fix-up" operations that | 
|  | 73 | /// occur once the first phase of widening is complete. These operations include | 
|  | 74 | /// type truncation and the second phase of recurrence widening. | 
|  | 75 | /// | 
|  | 76 | /// Entries from either map can be retrieved using the getVectorValue and | 
|  | 77 | /// getScalarValue functions, which assert that the desired value exists. | 
|  | 78 |  | 
|  | 79 | struct VectorizerValueMap { | 
|  | 80 | private: | 
|  | 81 | /// The unroll factor. Each entry in the vector map contains UF vector values. | 
|  | 82 | unsigned UF; | 
|  | 83 |  | 
|  | 84 | /// The vectorization factor. Each entry in the scalar map contains UF x VF | 
|  | 85 | /// scalar values. | 
|  | 86 | unsigned VF; | 
|  | 87 |  | 
|  | 88 | /// The vector and scalar map storage. We use std::map and not DenseMap | 
|  | 89 | /// because insertions to DenseMap invalidate its iterators. | 
|  | 90 | typedef SmallVector<Value *, 2> VectorParts; | 
|  | 91 | typedef SmallVector<SmallVector<Value *, 4>, 2> ScalarParts; | 
|  | 92 | std::map<Value *, VectorParts> VectorMapStorage; | 
|  | 93 | std::map<Value *, ScalarParts> ScalarMapStorage; | 
|  | 94 |  | 
|  | 95 | public: | 
|  | 96 | /// Construct an empty map with the given unroll and vectorization factors. | 
|  | 97 | VectorizerValueMap(unsigned UF, unsigned VF) : UF(UF), VF(VF) {} | 
|  | 98 |  | 
|  | 99 | /// \return True if the map has any vector entry for \p Key. | 
|  | 100 | bool hasAnyVectorValue(Value *Key) const { | 
|  | 101 | return VectorMapStorage.count(Key); | 
|  | 102 | } | 
|  | 103 |  | 
|  | 104 | /// \return True if the map has a vector entry for \p Key and \p Part. | 
|  | 105 | bool hasVectorValue(Value *Key, unsigned Part) const { | 
|  | 106 | assert(Part < UF && "Queried Vector Part is too large."); | 
|  | 107 | if (!hasAnyVectorValue(Key)) | 
|  | 108 | return false; | 
|  | 109 | const VectorParts &Entry = VectorMapStorage.find(Key)->second; | 
|  | 110 | assert(Entry.size() == UF && "VectorParts has wrong dimensions."); | 
|  | 111 | return Entry[Part] != nullptr; | 
|  | 112 | } | 
|  | 113 |  | 
|  | 114 | /// \return True if the map has any scalar entry for \p Key. | 
|  | 115 | bool hasAnyScalarValue(Value *Key) const { | 
|  | 116 | return ScalarMapStorage.count(Key); | 
|  | 117 | } | 
|  | 118 |  | 
|  | 119 | /// \return True if the map has a scalar entry for \p Key and \p Instance. | 
|  | 120 | bool hasScalarValue(Value *Key, const VPIteration &Instance) const { | 
|  | 121 | assert(Instance.Part < UF && "Queried Scalar Part is too large."); | 
|  | 122 | assert(Instance.Lane < VF && "Queried Scalar Lane is too large."); | 
|  | 123 | if (!hasAnyScalarValue(Key)) | 
|  | 124 | return false; | 
|  | 125 | const ScalarParts &Entry = ScalarMapStorage.find(Key)->second; | 
|  | 126 | assert(Entry.size() == UF && "ScalarParts has wrong dimensions."); | 
|  | 127 | assert(Entry[Instance.Part].size() == VF && | 
|  | 128 | "ScalarParts has wrong dimensions."); | 
|  | 129 | return Entry[Instance.Part][Instance.Lane] != nullptr; | 
|  | 130 | } | 
|  | 131 |  | 
|  | 132 | /// Retrieve the existing vector value that corresponds to \p Key and | 
|  | 133 | /// \p Part. | 
|  | 134 | Value *getVectorValue(Value *Key, unsigned Part) { | 
|  | 135 | assert(hasVectorValue(Key, Part) && "Getting non-existent value."); | 
|  | 136 | return VectorMapStorage[Key][Part]; | 
|  | 137 | } | 
|  | 138 |  | 
|  | 139 | /// Retrieve the existing scalar value that corresponds to \p Key and | 
|  | 140 | /// \p Instance. | 
|  | 141 | Value *getScalarValue(Value *Key, const VPIteration &Instance) { | 
|  | 142 | assert(hasScalarValue(Key, Instance) && "Getting non-existent value."); | 
|  | 143 | return ScalarMapStorage[Key][Instance.Part][Instance.Lane]; | 
|  | 144 | } | 
|  | 145 |  | 
|  | 146 | /// Set a vector value associated with \p Key and \p Part. Assumes such a | 
|  | 147 | /// value is not already set. If it is, use resetVectorValue() instead. | 
|  | 148 | void setVectorValue(Value *Key, unsigned Part, Value *Vector) { | 
|  | 149 | assert(!hasVectorValue(Key, Part) && "Vector value already set for part"); | 
|  | 150 | if (!VectorMapStorage.count(Key)) { | 
|  | 151 | VectorParts Entry(UF); | 
|  | 152 | VectorMapStorage[Key] = Entry; | 
|  | 153 | } | 
|  | 154 | VectorMapStorage[Key][Part] = Vector; | 
|  | 155 | } | 
|  | 156 |  | 
|  | 157 | /// Set a scalar value associated with \p Key and \p Instance. Assumes such a | 
|  | 158 | /// value is not already set. | 
|  | 159 | void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) { | 
|  | 160 | assert(!hasScalarValue(Key, Instance) && "Scalar value already set"); | 
|  | 161 | if (!ScalarMapStorage.count(Key)) { | 
|  | 162 | ScalarParts Entry(UF); | 
|  | 163 | // TODO: Consider storing uniform values only per-part, as they occupy | 
|  | 164 | //       lane 0 only, keeping the other VF-1 redundant entries null. | 
|  | 165 | for (unsigned Part = 0; Part < UF; ++Part) | 
|  | 166 | Entry[Part].resize(VF, nullptr); | 
|  | 167 | ScalarMapStorage[Key] = Entry; | 
|  | 168 | } | 
|  | 169 | ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar; | 
|  | 170 | } | 
|  | 171 |  | 
|  | 172 | /// Reset the vector value associated with \p Key for the given \p Part. | 
|  | 173 | /// This function can be used to update values that have already been | 
|  | 174 | /// vectorized. This is the case for "fix-up" operations including type | 
|  | 175 | /// truncation and the second phase of recurrence vectorization. | 
|  | 176 | void resetVectorValue(Value *Key, unsigned Part, Value *Vector) { | 
|  | 177 | assert(hasVectorValue(Key, Part) && "Vector value not set for part"); | 
|  | 178 | VectorMapStorage[Key][Part] = Vector; | 
|  | 179 | } | 
|  | 180 |  | 
|  | 181 | /// Reset the scalar value associated with \p Key for \p Part and \p Lane. | 
|  | 182 | /// This function can be used to update values that have already been | 
|  | 183 | /// scalarized. This is the case for "fix-up" operations including scalar phi | 
|  | 184 | /// nodes for scalarized and predicated instructions. | 
|  | 185 | void resetScalarValue(Value *Key, const VPIteration &Instance, | 
|  | 186 | Value *Scalar) { | 
|  | 187 | assert(hasScalarValue(Key, Instance) && | 
|  | 188 | "Scalar value not set for part and lane"); | 
|  | 189 | ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar; | 
|  | 190 | } | 
|  | 191 | }; | 
|  | 192 |  | 
|  | 193 | /// VPTransformState holds information passed down when "executing" a VPlan, | 
|  | 194 | /// needed for generating the output IR. | 
|  | 195 | struct VPTransformState { | 
|  | 196 |  | 
|  | 197 | VPTransformState(unsigned VF, unsigned UF, class LoopInfo *LI, | 
|  | 198 | class DominatorTree *DT, IRBuilder<> &Builder, | 
|  | 199 | VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV) | 
|  | 200 | : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), | 
|  | 201 | ValueMap(ValueMap), ILV(ILV) {} | 
|  | 202 |  | 
|  | 203 | /// The chosen Vectorization and Unroll Factors of the loop being vectorized. | 
|  | 204 | unsigned VF; | 
|  | 205 | unsigned UF; | 
|  | 206 |  | 
|  | 207 | /// Hold the indices to generate specific scalar instructions. Null indicates | 
|  | 208 | /// that all instances are to be generated, using either scalar or vector | 
|  | 209 | /// instructions. | 
|  | 210 | Optional<VPIteration> Instance; | 
|  | 211 |  | 
|  | 212 | /// Hold state information used when constructing the CFG of the output IR, | 
|  | 213 | /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. | 
|  | 214 | struct CFGState { | 
|  | 215 | /// The previous VPBasicBlock visited. Initially set to null. | 
|  | 216 | VPBasicBlock *PrevVPBB; | 
|  | 217 | /// The previous IR BasicBlock created or used. Initially set to the new | 
|  | 218 | /// header BasicBlock. | 
|  | 219 | BasicBlock *PrevBB; | 
|  | 220 | /// The last IR BasicBlock in the output IR. Set to the new latch | 
|  | 221 | /// BasicBlock, used for placing the newly created BasicBlocks. | 
|  | 222 | BasicBlock *LastBB; | 
|  | 223 | /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case | 
|  | 224 | /// of replication, maps the BasicBlock of the last replica created. | 
|  | 225 | SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB; | 
|  | 226 |  | 
|  | 227 | CFGState() : PrevVPBB(nullptr), PrevBB(nullptr), LastBB(nullptr) {} | 
|  | 228 | } CFG; | 
|  | 229 |  | 
|  | 230 | /// Hold a pointer to LoopInfo to register new basic blocks in the loop. | 
|  | 231 | class LoopInfo *LI; | 
|  | 232 |  | 
|  | 233 | /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. | 
|  | 234 | class DominatorTree *DT; | 
|  | 235 |  | 
|  | 236 | /// Hold a reference to the IRBuilder used to generate output IR code. | 
|  | 237 | IRBuilder<> &Builder; | 
|  | 238 |  | 
|  | 239 | /// Hold a reference to the Value state information used when generating the | 
|  | 240 | /// Values of the output IR. | 
|  | 241 | VectorizerValueMap &ValueMap; | 
|  | 242 |  | 
|  | 243 | /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods. | 
|  | 244 | class InnerLoopVectorizer *ILV; | 
|  | 245 | }; | 
|  | 246 |  | 
|  | 247 | /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. | 
|  | 248 | /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. | 
|  | 249 | class VPBlockBase { | 
|  | 250 | private: | 
|  | 251 | const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). | 
|  | 252 |  | 
|  | 253 | /// An optional name for the block. | 
|  | 254 | std::string Name; | 
|  | 255 |  | 
|  | 256 | /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if | 
|  | 257 | /// it is a topmost VPBlockBase. | 
|  | 258 | class VPRegionBlock *Parent; | 
|  | 259 |  | 
|  | 260 | /// List of predecessor blocks. | 
|  | 261 | SmallVector<VPBlockBase *, 1> Predecessors; | 
|  | 262 |  | 
|  | 263 | /// List of successor blocks. | 
|  | 264 | SmallVector<VPBlockBase *, 1> Successors; | 
|  | 265 |  | 
|  | 266 | /// Add \p Successor as the last successor to this block. | 
|  | 267 | void appendSuccessor(VPBlockBase *Successor) { | 
|  | 268 | assert(Successor && "Cannot add nullptr successor!"); | 
|  | 269 | Successors.push_back(Successor); | 
|  | 270 | } | 
|  | 271 |  | 
|  | 272 | /// Add \p Predecessor as the last predecessor to this block. | 
|  | 273 | void appendPredecessor(VPBlockBase *Predecessor) { | 
|  | 274 | assert(Predecessor && "Cannot add nullptr predecessor!"); | 
|  | 275 | Predecessors.push_back(Predecessor); | 
|  | 276 | } | 
|  | 277 |  | 
|  | 278 | /// Remove \p Predecessor from the predecessors of this block. | 
|  | 279 | void removePredecessor(VPBlockBase *Predecessor) { | 
|  | 280 | auto Pos = std::find(Predecessors.begin(), Predecessors.end(), Predecessor); | 
|  | 281 | assert(Pos && "Predecessor does not exist"); | 
|  | 282 | Predecessors.erase(Pos); | 
|  | 283 | } | 
|  | 284 |  | 
|  | 285 | /// Remove \p Successor from the successors of this block. | 
|  | 286 | void removeSuccessor(VPBlockBase *Successor) { | 
|  | 287 | auto Pos = std::find(Successors.begin(), Successors.end(), Successor); | 
|  | 288 | assert(Pos && "Successor does not exist"); | 
|  | 289 | Successors.erase(Pos); | 
|  | 290 | } | 
|  | 291 |  | 
|  | 292 | protected: | 
|  | 293 | VPBlockBase(const unsigned char SC, const std::string &N) | 
|  | 294 | : SubclassID(SC), Name(N), Parent(nullptr) {} | 
|  | 295 |  | 
|  | 296 | public: | 
|  | 297 | /// An enumeration for keeping track of the concrete subclass of VPBlockBase | 
|  | 298 | /// that are actually instantiated. Values of this enumeration are kept in the | 
|  | 299 | /// SubclassID field of the VPBlockBase objects. They are used for concrete | 
|  | 300 | /// type identification. | 
|  | 301 | typedef enum { VPBasicBlockSC, VPRegionBlockSC } VPBlockTy; | 
|  | 302 |  | 
|  | 303 | typedef SmallVectorImpl<VPBlockBase *> VPBlocksTy; | 
|  | 304 |  | 
|  | 305 | virtual ~VPBlockBase() {} | 
|  | 306 |  | 
|  | 307 | const std::string &getName() const { return Name; } | 
|  | 308 |  | 
|  | 309 | void setName(const Twine &newName) { Name = newName.str(); } | 
|  | 310 |  | 
|  | 311 | /// \return an ID for the concrete type of this object. | 
|  | 312 | /// This is used to implement the classof checks. This should not be used | 
|  | 313 | /// for any other purpose, as the values may change as LLVM evolves. | 
|  | 314 | unsigned getVPBlockID() const { return SubclassID; } | 
|  | 315 |  | 
|  | 316 | const VPRegionBlock *getParent() const { return Parent; } | 
|  | 317 |  | 
|  | 318 | void setParent(VPRegionBlock *P) { Parent = P; } | 
|  | 319 |  | 
|  | 320 | /// \return the VPBasicBlock that is the entry of this VPBlockBase, | 
|  | 321 | /// recursively, if the latter is a VPRegionBlock. Otherwise, if this | 
|  | 322 | /// VPBlockBase is a VPBasicBlock, it is returned. | 
|  | 323 | const VPBasicBlock *getEntryBasicBlock() const; | 
|  | 324 | VPBasicBlock *getEntryBasicBlock(); | 
|  | 325 |  | 
|  | 326 | /// \return the VPBasicBlock that is the exit of this VPBlockBase, | 
|  | 327 | /// recursively, if the latter is a VPRegionBlock. Otherwise, if this | 
|  | 328 | /// VPBlockBase is a VPBasicBlock, it is returned. | 
|  | 329 | const VPBasicBlock *getExitBasicBlock() const; | 
|  | 330 | VPBasicBlock *getExitBasicBlock(); | 
|  | 331 |  | 
|  | 332 | const VPBlocksTy &getSuccessors() const { return Successors; } | 
|  | 333 | VPBlocksTy &getSuccessors() { return Successors; } | 
|  | 334 |  | 
|  | 335 | const VPBlocksTy &getPredecessors() const { return Predecessors; } | 
|  | 336 | VPBlocksTy &getPredecessors() { return Predecessors; } | 
|  | 337 |  | 
|  | 338 | /// \return the successor of this VPBlockBase if it has a single successor. | 
|  | 339 | /// Otherwise return a null pointer. | 
|  | 340 | VPBlockBase *getSingleSuccessor() const { | 
|  | 341 | return (Successors.size() == 1 ? *Successors.begin() : nullptr); | 
|  | 342 | } | 
|  | 343 |  | 
|  | 344 | /// \return the predecessor of this VPBlockBase if it has a single | 
|  | 345 | /// predecessor. Otherwise return a null pointer. | 
|  | 346 | VPBlockBase *getSinglePredecessor() const { | 
|  | 347 | return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr); | 
|  | 348 | } | 
|  | 349 |  | 
|  | 350 | /// An Enclosing Block of a block B is any block containing B, including B | 
|  | 351 | /// itself. \return the closest enclosing block starting from "this", which | 
|  | 352 | /// has successors. \return the root enclosing block if all enclosing blocks | 
|  | 353 | /// have no successors. | 
|  | 354 | VPBlockBase *getEnclosingBlockWithSuccessors(); | 
|  | 355 |  | 
|  | 356 | /// \return the closest enclosing block starting from "this", which has | 
|  | 357 | /// predecessors. \return the root enclosing block if all enclosing blocks | 
|  | 358 | /// have no predecessors. | 
|  | 359 | VPBlockBase *getEnclosingBlockWithPredecessors(); | 
|  | 360 |  | 
|  | 361 | /// \return the successors either attached directly to this VPBlockBase or, if | 
|  | 362 | /// this VPBlockBase is the exit block of a VPRegionBlock and has no | 
|  | 363 | /// successors of its own, search recursively for the first enclosing | 
|  | 364 | /// VPRegionBlock that has successors and return them. If no such | 
|  | 365 | /// VPRegionBlock exists, return the (empty) successors of the topmost | 
|  | 366 | /// VPBlockBase reached. | 
|  | 367 | const VPBlocksTy &getHierarchicalSuccessors() { | 
|  | 368 | return getEnclosingBlockWithSuccessors()->getSuccessors(); | 
|  | 369 | } | 
|  | 370 |  | 
|  | 371 | /// \return the hierarchical successor of this VPBlockBase if it has a single | 
|  | 372 | /// hierarchical successor. Otherwise return a null pointer. | 
|  | 373 | VPBlockBase *getSingleHierarchicalSuccessor() { | 
|  | 374 | return getEnclosingBlockWithSuccessors()->getSingleSuccessor(); | 
|  | 375 | } | 
|  | 376 |  | 
|  | 377 | /// \return the predecessors either attached directly to this VPBlockBase or, | 
|  | 378 | /// if this VPBlockBase is the entry block of a VPRegionBlock and has no | 
|  | 379 | /// predecessors of its own, search recursively for the first enclosing | 
|  | 380 | /// VPRegionBlock that has predecessors and return them. If no such | 
|  | 381 | /// VPRegionBlock exists, return the (empty) predecessors of the topmost | 
|  | 382 | /// VPBlockBase reached. | 
|  | 383 | const VPBlocksTy &getHierarchicalPredecessors() { | 
|  | 384 | return getEnclosingBlockWithPredecessors()->getPredecessors(); | 
|  | 385 | } | 
|  | 386 |  | 
|  | 387 | /// \return the hierarchical predecessor of this VPBlockBase if it has a | 
|  | 388 | /// single hierarchical predecessor. Otherwise return a null pointer. | 
|  | 389 | VPBlockBase *getSingleHierarchicalPredecessor() { | 
|  | 390 | return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); | 
|  | 391 | } | 
|  | 392 |  | 
|  | 393 | /// Sets a given VPBlockBase \p Successor as the single successor and \return | 
|  | 394 | /// \p Successor. The parent of this Block is copied to be the parent of | 
|  | 395 | /// \p Successor. | 
|  | 396 | VPBlockBase *setOneSuccessor(VPBlockBase *Successor) { | 
|  | 397 | assert(Successors.empty() && "Setting one successor when others exist."); | 
|  | 398 | appendSuccessor(Successor); | 
|  | 399 | Successor->appendPredecessor(this); | 
|  | 400 | Successor->Parent = Parent; | 
|  | 401 | return Successor; | 
|  | 402 | } | 
|  | 403 |  | 
|  | 404 | /// Sets two given VPBlockBases \p IfTrue and \p IfFalse to be the two | 
|  | 405 | /// successors. The parent of this Block is copied to be the parent of both | 
|  | 406 | /// \p IfTrue and \p IfFalse. | 
|  | 407 | void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) { | 
|  | 408 | assert(Successors.empty() && "Setting two successors when others exist."); | 
|  | 409 | appendSuccessor(IfTrue); | 
|  | 410 | appendSuccessor(IfFalse); | 
|  | 411 | IfTrue->appendPredecessor(this); | 
|  | 412 | IfFalse->appendPredecessor(this); | 
|  | 413 | IfTrue->Parent = Parent; | 
|  | 414 | IfFalse->Parent = Parent; | 
|  | 415 | } | 
|  | 416 |  | 
|  | 417 | void disconnectSuccessor(VPBlockBase *Successor) { | 
|  | 418 | assert(Successor && "Successor to disconnect is null."); | 
|  | 419 | removeSuccessor(Successor); | 
|  | 420 | Successor->removePredecessor(this); | 
|  | 421 | } | 
|  | 422 |  | 
|  | 423 | /// The method which generates the output IR that correspond to this | 
|  | 424 | /// VPBlockBase, thereby "executing" the VPlan. | 
|  | 425 | virtual void execute(struct VPTransformState *State) = 0; | 
|  | 426 |  | 
|  | 427 | /// Delete all blocks reachable from a given VPBlockBase, inclusive. | 
|  | 428 | static void deleteCFG(VPBlockBase *Entry); | 
|  | 429 | }; | 
|  | 430 |  | 
|  | 431 | /// VPRecipeBase is a base class modeling a sequence of one or more output IR | 
|  | 432 | /// instructions. | 
|  | 433 | class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> { | 
|  | 434 | friend VPBasicBlock; | 
|  | 435 |  | 
|  | 436 | private: | 
|  | 437 | const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). | 
|  | 438 |  | 
|  | 439 | /// Each VPRecipe belongs to a single VPBasicBlock. | 
|  | 440 | VPBasicBlock *Parent; | 
|  | 441 |  | 
|  | 442 | public: | 
|  | 443 | /// An enumeration for keeping track of the concrete subclass of VPRecipeBase | 
|  | 444 | /// that is actually instantiated. Values of this enumeration are kept in the | 
|  | 445 | /// SubclassID field of the VPRecipeBase objects. They are used for concrete | 
|  | 446 | /// type identification. | 
|  | 447 | typedef enum { | 
|  | 448 | VPBranchOnMaskSC, | 
|  | 449 | VPInterleaveSC, | 
|  | 450 | VPPredInstPHISC, | 
|  | 451 | VPReplicateSC, | 
|  | 452 | VPWidenIntOrFpInductionSC, | 
|  | 453 | VPWidenPHISC, | 
|  | 454 | VPWidenSC, | 
|  | 455 | } VPRecipeTy; | 
|  | 456 |  | 
|  | 457 | VPRecipeBase(const unsigned char SC) : SubclassID(SC), Parent(nullptr) {} | 
|  | 458 |  | 
|  | 459 | virtual ~VPRecipeBase() {} | 
|  | 460 |  | 
|  | 461 | /// \return an ID for the concrete type of this object. | 
|  | 462 | /// This is used to implement the classof checks. This should not be used | 
|  | 463 | /// for any other purpose, as the values may change as LLVM evolves. | 
|  | 464 | unsigned getVPRecipeID() const { return SubclassID; } | 
|  | 465 |  | 
|  | 466 | /// \return the VPBasicBlock which this VPRecipe belongs to. | 
|  | 467 | VPBasicBlock *getParent() { return Parent; } | 
|  | 468 | const VPBasicBlock *getParent() const { return Parent; } | 
|  | 469 |  | 
|  | 470 | /// The method which generates the output IR instructions that correspond to | 
|  | 471 | /// this VPRecipe, thereby "executing" the VPlan. | 
|  | 472 | virtual void execute(struct VPTransformState &State) = 0; | 
|  | 473 |  | 
|  | 474 | /// Each recipe prints itself. | 
|  | 475 | virtual void print(raw_ostream &O, const Twine &Indent) const = 0; | 
|  | 476 | }; | 
|  | 477 |  | 
|  | 478 | /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It | 
|  | 479 | /// holds a sequence of zero or more VPRecipe's each representing a sequence of | 
|  | 480 | /// output IR instructions. | 
|  | 481 | class VPBasicBlock : public VPBlockBase { | 
|  | 482 | public: | 
|  | 483 | typedef iplist<VPRecipeBase> RecipeListTy; | 
|  | 484 |  | 
|  | 485 | private: | 
|  | 486 | /// The VPRecipes held in the order of output instructions to generate. | 
|  | 487 | RecipeListTy Recipes; | 
|  | 488 |  | 
|  | 489 | public: | 
|  | 490 | /// Instruction iterators... | 
|  | 491 | typedef RecipeListTy::iterator iterator; | 
|  | 492 | typedef RecipeListTy::const_iterator const_iterator; | 
|  | 493 | typedef RecipeListTy::reverse_iterator reverse_iterator; | 
|  | 494 | typedef RecipeListTy::const_reverse_iterator const_reverse_iterator; | 
|  | 495 |  | 
|  | 496 | //===--------------------------------------------------------------------===// | 
|  | 497 | /// Recipe iterator methods | 
|  | 498 | /// | 
|  | 499 | inline iterator begin() { return Recipes.begin(); } | 
|  | 500 | inline const_iterator begin() const { return Recipes.begin(); } | 
|  | 501 | inline iterator end() { return Recipes.end(); } | 
|  | 502 | inline const_iterator end() const { return Recipes.end(); } | 
|  | 503 |  | 
|  | 504 | inline reverse_iterator rbegin() { return Recipes.rbegin(); } | 
|  | 505 | inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); } | 
|  | 506 | inline reverse_iterator rend() { return Recipes.rend(); } | 
|  | 507 | inline const_reverse_iterator rend() const { return Recipes.rend(); } | 
|  | 508 |  | 
|  | 509 | inline size_t size() const { return Recipes.size(); } | 
|  | 510 | inline bool empty() const { return Recipes.empty(); } | 
|  | 511 | inline const VPRecipeBase &front() const { return Recipes.front(); } | 
|  | 512 | inline VPRecipeBase &front() { return Recipes.front(); } | 
|  | 513 | inline const VPRecipeBase &back() const { return Recipes.back(); } | 
|  | 514 | inline VPRecipeBase &back() { return Recipes.back(); } | 
|  | 515 |  | 
|  | 516 | /// \brief Returns a pointer to a member of the recipe list. | 
|  | 517 | static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) { | 
|  | 518 | return &VPBasicBlock::Recipes; | 
|  | 519 | } | 
|  | 520 |  | 
|  | 521 | VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) | 
|  | 522 | : VPBlockBase(VPBasicBlockSC, Name.str()) { | 
|  | 523 | if (Recipe) | 
|  | 524 | appendRecipe(Recipe); | 
|  | 525 | } | 
|  | 526 |  | 
|  | 527 | ~VPBasicBlock() { Recipes.clear(); } | 
|  | 528 |  | 
|  | 529 | /// Method to support type inquiry through isa, cast, and dyn_cast. | 
|  | 530 | static inline bool classof(const VPBlockBase *V) { | 
|  | 531 | return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC; | 
|  | 532 | } | 
|  | 533 |  | 
|  | 534 | /// Augment the existing recipes of a VPBasicBlock with an additional | 
|  | 535 | /// \p Recipe as the last recipe. | 
|  | 536 | void appendRecipe(VPRecipeBase *Recipe) { | 
|  | 537 | assert(Recipe && "No recipe to append."); | 
|  | 538 | assert(!Recipe->Parent && "Recipe already in VPlan"); | 
|  | 539 | Recipe->Parent = this; | 
|  | 540 | return Recipes.push_back(Recipe); | 
|  | 541 | } | 
|  | 542 |  | 
|  | 543 | /// The method which generates the output IR instructions that correspond to | 
|  | 544 | /// this VPBasicBlock, thereby "executing" the VPlan. | 
|  | 545 | void execute(struct VPTransformState *State) override; | 
|  | 546 |  | 
|  | 547 | private: | 
|  | 548 | /// Create an IR BasicBlock to hold the output instructions generated by this | 
|  | 549 | /// VPBasicBlock, and return it. Update the CFGState accordingly. | 
|  | 550 | BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG); | 
|  | 551 | }; | 
|  | 552 |  | 
|  | 553 | /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks | 
|  | 554 | /// which form a Single-Entry-Single-Exit subgraph of the output IR CFG. | 
|  | 555 | /// A VPRegionBlock may indicate that its contents are to be replicated several | 
|  | 556 | /// times. This is designed to support predicated scalarization, in which a | 
|  | 557 | /// scalar if-then code structure needs to be generated VF * UF times. Having | 
|  | 558 | /// this replication indicator helps to keep a single model for multiple | 
|  | 559 | /// candidate VF's. The actual replication takes place only once the desired VF | 
|  | 560 | /// and UF have been determined. | 
|  | 561 | class VPRegionBlock : public VPBlockBase { | 
|  | 562 | private: | 
|  | 563 | /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. | 
|  | 564 | VPBlockBase *Entry; | 
|  | 565 |  | 
|  | 566 | /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock. | 
|  | 567 | VPBlockBase *Exit; | 
|  | 568 |  | 
|  | 569 | /// An indicator whether this region is to generate multiple replicated | 
|  | 570 | /// instances of output IR corresponding to its VPBlockBases. | 
|  | 571 | bool IsReplicator; | 
|  | 572 |  | 
|  | 573 | public: | 
|  | 574 | VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit, | 
|  | 575 | const std::string &Name = "", bool IsReplicator = false) | 
|  | 576 | : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit), | 
|  | 577 | IsReplicator(IsReplicator) { | 
|  | 578 | assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); | 
|  | 579 | assert(Exit->getSuccessors().empty() && "Exit block has successors."); | 
|  | 580 | Entry->setParent(this); | 
|  | 581 | Exit->setParent(this); | 
|  | 582 | } | 
|  | 583 |  | 
|  | 584 | ~VPRegionBlock() { | 
|  | 585 | if (Entry) | 
|  | 586 | deleteCFG(Entry); | 
|  | 587 | } | 
|  | 588 |  | 
|  | 589 | /// Method to support type inquiry through isa, cast, and dyn_cast. | 
|  | 590 | static inline bool classof(const VPBlockBase *V) { | 
|  | 591 | return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC; | 
|  | 592 | } | 
|  | 593 |  | 
|  | 594 | const VPBlockBase *getEntry() const { return Entry; } | 
|  | 595 | VPBlockBase *getEntry() { return Entry; } | 
|  | 596 |  | 
|  | 597 | const VPBlockBase *getExit() const { return Exit; } | 
|  | 598 | VPBlockBase *getExit() { return Exit; } | 
|  | 599 |  | 
|  | 600 | /// An indicator whether this region is to generate multiple replicated | 
|  | 601 | /// instances of output IR corresponding to its VPBlockBases. | 
|  | 602 | bool isReplicator() const { return IsReplicator; } | 
|  | 603 |  | 
|  | 604 | /// The method which generates the output IR instructions that correspond to | 
|  | 605 | /// this VPRegionBlock, thereby "executing" the VPlan. | 
|  | 606 | void execute(struct VPTransformState *State) override; | 
|  | 607 | }; | 
|  | 608 |  | 
|  | 609 | /// VPlan models a candidate for vectorization, encoding various decisions take | 
|  | 610 | /// to produce efficient output IR, including which branches, basic-blocks and | 
|  | 611 | /// output IR instructions to generate, and their cost. VPlan holds a | 
|  | 612 | /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry | 
|  | 613 | /// VPBlock. | 
|  | 614 | class VPlan { | 
|  | 615 | private: | 
|  | 616 | /// Hold the single entry to the Hierarchical CFG of the VPlan. | 
|  | 617 | VPBlockBase *Entry; | 
|  | 618 |  | 
|  | 619 | /// Holds the VFs applicable to this VPlan. | 
|  | 620 | SmallSet<unsigned, 2> VFs; | 
|  | 621 |  | 
|  | 622 | /// Holds the name of the VPlan, for printing. | 
|  | 623 | std::string Name; | 
|  | 624 |  | 
|  | 625 | public: | 
|  | 626 | VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {} | 
|  | 627 |  | 
|  | 628 | ~VPlan() { | 
|  | 629 | if (Entry) | 
|  | 630 | VPBlockBase::deleteCFG(Entry); | 
|  | 631 | } | 
|  | 632 |  | 
|  | 633 | /// Generate the IR code for this VPlan. | 
|  | 634 | void execute(struct VPTransformState *State); | 
|  | 635 |  | 
|  | 636 | VPBlockBase *getEntry() { return Entry; } | 
|  | 637 | const VPBlockBase *getEntry() const { return Entry; } | 
|  | 638 |  | 
|  | 639 | VPBlockBase *setEntry(VPBlockBase *Block) { return Entry = Block; } | 
|  | 640 |  | 
|  | 641 | void addVF(unsigned VF) { VFs.insert(VF); } | 
|  | 642 |  | 
|  | 643 | bool hasVF(unsigned VF) { return VFs.count(VF); } | 
|  | 644 |  | 
|  | 645 | const std::string &getName() const { return Name; } | 
|  | 646 |  | 
|  | 647 | void setName(const Twine &newName) { Name = newName.str(); } | 
|  | 648 |  | 
|  | 649 | private: | 
|  | 650 | /// Add to the given dominator tree the header block and every new basic block | 
|  | 651 | /// that was created between it and the latch block, inclusive. | 
|  | 652 | static void updateDominatorTree(class DominatorTree *DT, | 
|  | 653 | BasicBlock *LoopPreHeaderBB, | 
|  | 654 | BasicBlock *LoopLatchBB); | 
|  | 655 | }; | 
|  | 656 |  | 
|  | 657 | /// VPlanPrinter prints a given VPlan to a given output stream. The printing is | 
|  | 658 | /// indented and follows the dot format. | 
|  | 659 | class VPlanPrinter { | 
|  | 660 | friend inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan); | 
|  | 661 | friend inline raw_ostream &operator<<(raw_ostream &OS, | 
|  | 662 | const struct VPlanIngredient &I); | 
|  | 663 |  | 
|  | 664 | private: | 
|  | 665 | raw_ostream &OS; | 
|  | 666 | VPlan &Plan; | 
|  | 667 | unsigned Depth; | 
|  | 668 | unsigned TabWidth = 2; | 
|  | 669 | std::string Indent; | 
|  | 670 |  | 
|  | 671 | unsigned BID = 0; | 
|  | 672 |  | 
|  | 673 | SmallDenseMap<const VPBlockBase *, unsigned> BlockID; | 
|  | 674 |  | 
|  | 675 | /// Handle indentation. | 
|  | 676 | void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } | 
|  | 677 |  | 
|  | 678 | /// Print a given \p Block of the Plan. | 
|  | 679 | void dumpBlock(const VPBlockBase *Block); | 
|  | 680 |  | 
|  | 681 | /// Print the information related to the CFG edges going out of a given | 
|  | 682 | /// \p Block, followed by printing the successor blocks themselves. | 
|  | 683 | void dumpEdges(const VPBlockBase *Block); | 
|  | 684 |  | 
|  | 685 | /// Print a given \p BasicBlock, including its VPRecipes, followed by printing | 
|  | 686 | /// its successor blocks. | 
|  | 687 | void dumpBasicBlock(const VPBasicBlock *BasicBlock); | 
|  | 688 |  | 
|  | 689 | /// Print a given \p Region of the Plan. | 
|  | 690 | void dumpRegion(const VPRegionBlock *Region); | 
|  | 691 |  | 
|  | 692 | unsigned getOrCreateBID(const VPBlockBase *Block) { | 
|  | 693 | return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++; | 
|  | 694 | } | 
|  | 695 |  | 
|  | 696 | const Twine getOrCreateName(const VPBlockBase *Block); | 
|  | 697 |  | 
|  | 698 | const Twine getUID(const VPBlockBase *Block); | 
|  | 699 |  | 
|  | 700 | /// Print the information related to a CFG edge between two VPBlockBases. | 
|  | 701 | void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden, | 
|  | 702 | const Twine &Label); | 
|  | 703 |  | 
|  | 704 | VPlanPrinter(raw_ostream &O, VPlan &P) : OS(O), Plan(P) {} | 
|  | 705 |  | 
|  | 706 | void dump(); | 
|  | 707 |  | 
|  | 708 | static void printAsIngredient(raw_ostream &O, Value *V); | 
|  | 709 | }; | 
|  | 710 |  | 
|  | 711 | struct VPlanIngredient { | 
|  | 712 | Value *V; | 
|  | 713 | VPlanIngredient(Value *V) : V(V) {} | 
|  | 714 | }; | 
|  | 715 |  | 
|  | 716 | inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) { | 
|  | 717 | VPlanPrinter::printAsIngredient(OS, I.V); | 
|  | 718 | return OS; | 
|  | 719 | } | 
|  | 720 |  | 
|  | 721 | inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) { | 
|  | 722 | VPlanPrinter Printer(OS, Plan); | 
|  | 723 | Printer.dump(); | 
|  | 724 | return OS; | 
|  | 725 | } | 
|  | 726 |  | 
|  | 727 | //===--------------------------------------------------------------------===// | 
|  | 728 | // GraphTraits specializations for VPlan/VPRegionBlock Control-Flow Graphs  // | 
|  | 729 | //===--------------------------------------------------------------------===// | 
|  | 730 |  | 
|  | 731 | // Provide specializations of GraphTraits to be able to treat a VPBlockBase as a | 
|  | 732 | // graph of VPBlockBase nodes... | 
|  | 733 |  | 
|  | 734 | template <> struct GraphTraits<VPBlockBase *> { | 
|  | 735 | typedef VPBlockBase *NodeRef; | 
|  | 736 | typedef SmallVectorImpl<VPBlockBase *>::iterator ChildIteratorType; | 
|  | 737 |  | 
|  | 738 | static NodeRef getEntryNode(NodeRef N) { return N; } | 
|  | 739 |  | 
|  | 740 | static inline ChildIteratorType child_begin(NodeRef N) { | 
|  | 741 | return N->getSuccessors().begin(); | 
|  | 742 | } | 
|  | 743 |  | 
|  | 744 | static inline ChildIteratorType child_end(NodeRef N) { | 
|  | 745 | return N->getSuccessors().end(); | 
|  | 746 | } | 
|  | 747 | }; | 
|  | 748 |  | 
|  | 749 | template <> struct GraphTraits<const VPBlockBase *> { | 
|  | 750 | typedef const VPBlockBase *NodeRef; | 
|  | 751 | typedef SmallVectorImpl<VPBlockBase *>::const_iterator ChildIteratorType; | 
|  | 752 |  | 
|  | 753 | static NodeRef getEntryNode(NodeRef N) { return N; } | 
|  | 754 |  | 
|  | 755 | static inline ChildIteratorType child_begin(NodeRef N) { | 
|  | 756 | return N->getSuccessors().begin(); | 
|  | 757 | } | 
|  | 758 |  | 
|  | 759 | static inline ChildIteratorType child_end(NodeRef N) { | 
|  | 760 | return N->getSuccessors().end(); | 
|  | 761 | } | 
|  | 762 | }; | 
|  | 763 |  | 
|  | 764 | // Provide specializations of GraphTraits to be able to treat a VPBlockBase as a | 
|  | 765 | // graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order | 
|  | 766 | // for a VPBlockBase is considered to be when traversing the predecessors of a | 
|  | 767 | // VPBlockBase instead of its successors. | 
|  | 768 | // | 
|  | 769 |  | 
|  | 770 | template <> struct GraphTraits<Inverse<VPBlockBase *>> { | 
|  | 771 | typedef VPBlockBase *NodeRef; | 
|  | 772 | typedef SmallVectorImpl<VPBlockBase *>::iterator ChildIteratorType; | 
|  | 773 |  | 
|  | 774 | static Inverse<VPBlockBase *> getEntryNode(Inverse<VPBlockBase *> B) { | 
|  | 775 | return B; | 
|  | 776 | } | 
|  | 777 |  | 
|  | 778 | static inline ChildIteratorType child_begin(NodeRef N) { | 
|  | 779 | return N->getPredecessors().begin(); | 
|  | 780 | } | 
|  | 781 |  | 
|  | 782 | static inline ChildIteratorType child_end(NodeRef N) { | 
|  | 783 | return N->getPredecessors().end(); | 
|  | 784 | } | 
|  | 785 | }; | 
|  | 786 |  | 
|  | 787 | } // namespace llvm | 
|  | 788 |  | 
|  | 789 | #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H |