Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 1 | //===- VecUtils.h --- Vectorization Utilities -----------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 9 | #define DEBUG_TYPE "SLP" |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 10 | |
| 11 | #include "VecUtils.h" |
| 12 | #include "llvm/ADT/DenseMap.h" |
| 13 | #include "llvm/ADT/SmallPtrSet.h" |
| 14 | #include "llvm/ADT/SmallSet.h" |
| 15 | #include "llvm/ADT/SmallVector.h" |
| 16 | #include "llvm/Analysis/AliasAnalysis.h" |
| 17 | #include "llvm/Analysis/ScalarEvolution.h" |
| 18 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| 19 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 20 | #include "llvm/Analysis/Verifier.h" |
| 21 | #include "llvm/IR/Constants.h" |
| 22 | #include "llvm/IR/DataLayout.h" |
| 23 | #include "llvm/IR/Function.h" |
| 24 | #include "llvm/IR/Instructions.h" |
| 25 | #include "llvm/IR/Module.h" |
| 26 | #include "llvm/IR/Type.h" |
| 27 | #include "llvm/IR/Value.h" |
| 28 | #include "llvm/Pass.h" |
| 29 | #include "llvm/Support/CommandLine.h" |
| 30 | #include "llvm/Support/Debug.h" |
| 31 | #include "llvm/Support/raw_ostream.h" |
| 32 | #include "llvm/Target/TargetLibraryInfo.h" |
| 33 | #include "llvm/Transforms/Scalar.h" |
| 34 | #include "llvm/Transforms/Utils/Local.h" |
| 35 | #include <algorithm> |
| 36 | #include <map> |
| 37 | |
| 38 | using namespace llvm; |
| 39 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 40 | static const unsigned MinVecRegSize = 128; |
| 41 | |
| 42 | static const unsigned RecursionMaxDepth = 6; |
| 43 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 44 | namespace llvm { |
| 45 | |
| 46 | BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl, |
| 47 | TargetTransformInfo *Tti, AliasAnalysis *Aa) : |
| 48 | BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa) { |
| 49 | numberInstructions(); |
| 50 | } |
| 51 | |
| 52 | void BoUpSLP::numberInstructions() { |
| 53 | int Loc = 0; |
| 54 | InstrIdx.clear(); |
| 55 | InstrVec.clear(); |
| 56 | // Number the instructions in the block. |
| 57 | for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) { |
| 58 | InstrIdx[it] = Loc++; |
| 59 | InstrVec.push_back(it); |
| 60 | assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation"); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | Value *BoUpSLP::getPointerOperand(Value *I) { |
| 65 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); |
| 66 | if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); |
| 67 | return 0; |
| 68 | } |
| 69 | |
| 70 | unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { |
| 71 | if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace(); |
| 72 | if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace(); |
| 73 | return -1; |
| 74 | } |
| 75 | |
| 76 | bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { |
| 77 | Value *PtrA = getPointerOperand(A); |
| 78 | Value *PtrB = getPointerOperand(B); |
| 79 | unsigned ASA = getAddressSpaceOperand(A); |
| 80 | unsigned ASB = getAddressSpaceOperand(B); |
| 81 | |
| 82 | // Check that the address spaces match and that the pointers are valid. |
| 83 | if (!PtrA || !PtrB || (ASA != ASB)) return false; |
| 84 | |
| 85 | // Check that A and B are of the same type. |
| 86 | if (PtrA->getType() != PtrB->getType()) return false; |
| 87 | |
| 88 | // Calculate the distance. |
| 89 | const SCEV *PtrSCEVA = SE->getSCEV(PtrA); |
| 90 | const SCEV *PtrSCEVB = SE->getSCEV(PtrB); |
| 91 | const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB); |
| 92 | const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV); |
| 93 | |
| 94 | // Non constant distance. |
| 95 | if (!ConstOffSCEV) return false; |
| 96 | |
| 97 | unsigned Offset = ConstOffSCEV->getValue()->getSExtValue(); |
| 98 | Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); |
| 99 | // The Instructions are connsecutive if the size of the first load/store is |
| 100 | // the same as the offset. |
Nadav Rotem | 88dd5f7 | 2013-04-10 18:57:27 +0000 | [diff] [blame] | 101 | unsigned Sz = DL->getTypeStoreSize(Ty); |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 102 | return ((-Offset) == Sz); |
| 103 | } |
| 104 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 105 | bool BoUpSLP::vectorizeStoreChain(ValueList &Chain, int CostThreshold) { |
| 106 | Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); |
| 107 | unsigned Sz = DL->getTypeSizeInBits(StoreTy); |
| 108 | unsigned VF = MinVecRegSize / Sz; |
| 109 | |
| 110 | if (!isPowerOf2_32(Sz) || VF < 2) return false; |
| 111 | |
| 112 | bool Changed = false; |
Nadav Rotem | 3403c11 | 2013-04-14 07:22:22 +0000 | [diff] [blame] | 113 | // Look for profitable vectorizable trees at all offsets, starting at zero. |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 114 | for (unsigned i = 0, e = Chain.size(); i < e; ++i) { |
| 115 | if (i + VF > e) return Changed; |
| 116 | DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n"); |
| 117 | ValueList Operands(&Chain[i], &Chain[i] + VF); |
| 118 | |
| 119 | int Cost = getTreeCost(Operands); |
| 120 | DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); |
| 121 | if (Cost < CostThreshold) { |
| 122 | DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); |
| 123 | vectorizeTree(Operands, VF); |
| 124 | i += VF; |
| 125 | Changed = true; |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | return Changed; |
| 130 | } |
| 131 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 132 | bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) { |
| 133 | ValueSet Heads, Tails; |
| 134 | SmallDenseMap<Value*, Value*> ConsecutiveChain; |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 135 | |
| 136 | // We may run into multiple chains that merge into a single chain. We mark the |
| 137 | // stores that we vectorized so that we don't visit the same store twice. |
| 138 | ValueSet VectorizedStores; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 139 | bool Changed = false; |
| 140 | |
| 141 | // Do a quadratic search on all of the given stores and find |
| 142 | // all of the pairs of loads that follow each other. |
| 143 | for (unsigned i = 0, e = Stores.size(); i < e; ++i) |
| 144 | for (unsigned j = 0; j < e; ++j) { |
| 145 | if (i == j) continue; |
| 146 | if (isConsecutiveAccess(Stores[i], Stores[j])) { |
| 147 | Tails.insert(Stores[j]); |
| 148 | Heads.insert(Stores[i]); |
| 149 | ConsecutiveChain[Stores[i]] = Stores[j]; |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | // For stores that start but don't end a link in the chain: |
| 154 | for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) { |
| 155 | if (Tails.count(*it)) continue; |
| 156 | |
| 157 | // We found a store instr that starts a chain. Now follow the chain and try |
| 158 | // to vectorize it. |
| 159 | ValueList Operands; |
| 160 | Value *I = *it; |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 161 | // Collect the chain into a list. |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 162 | while (Tails.count(I) || Heads.count(I)) { |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 163 | if (VectorizedStores.count(I)) break; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 164 | Operands.push_back(I); |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 165 | // Move to the next value in the chain. |
| 166 | I = ConsecutiveChain[I]; |
| 167 | } |
| 168 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 169 | bool Vectorized = vectorizeStoreChain(Operands, costThreshold); |
Nadav Rotem | 3403c11 | 2013-04-14 07:22:22 +0000 | [diff] [blame] | 170 | |
| 171 | // Mark the vectorized stores so that we don't vectorize them again. |
| 172 | if (Vectorized) |
| 173 | VectorizedStores.insert(Operands.begin(), Operands.end()); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 174 | Changed |= Vectorized; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 175 | } |
| 176 | |
| 177 | return Changed; |
| 178 | } |
| 179 | |
Nadav Rotem | 54b413d | 2013-04-14 05:15:53 +0000 | [diff] [blame] | 180 | int BoUpSLP::getScalarizationCost(ValueList &VL) { |
Nadav Rotem | 3403c11 | 2013-04-14 07:22:22 +0000 | [diff] [blame] | 181 | // Find the type of the operands in VL. |
Nadav Rotem | 54b413d | 2013-04-14 05:15:53 +0000 | [diff] [blame] | 182 | Type *ScalarTy = VL[0]->getType(); |
Nadav Rotem | 54b413d | 2013-04-14 05:15:53 +0000 | [diff] [blame] | 183 | if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) |
| 184 | ScalarTy = SI->getValueOperand()->getType(); |
Nadav Rotem | 54b413d | 2013-04-14 05:15:53 +0000 | [diff] [blame] | 185 | VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); |
Nadav Rotem | 3403c11 | 2013-04-14 07:22:22 +0000 | [diff] [blame] | 186 | // Find the cost of inserting/extracting values from the vector. |
Nadav Rotem | 54b413d | 2013-04-14 05:15:53 +0000 | [diff] [blame] | 187 | return getScalarizationCost(VecTy); |
| 188 | } |
| 189 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 190 | int BoUpSLP::getScalarizationCost(Type *Ty) { |
| 191 | int Cost = 0; |
| 192 | for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) |
| 193 | Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
| 194 | return Cost; |
| 195 | } |
| 196 | |
| 197 | AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) { |
| 198 | if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI); |
| 199 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI); |
| 200 | return AliasAnalysis::Location(); |
| 201 | } |
| 202 | |
| 203 | Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) { |
| 204 | assert(Src->getParent() == Dst->getParent() && "Not the same BB"); |
| 205 | BasicBlock::iterator I = Src, E = Dst; |
| 206 | /// Scan all of the instruction from SRC to DST and check if |
| 207 | /// the source may alias. |
| 208 | for (++I; I != E; ++I) { |
| 209 | // Ignore store instructions that are marked as 'ignore'. |
| 210 | if (MemBarrierIgnoreList.count(I)) continue; |
| 211 | if (Src->mayWriteToMemory()) /* Write */ { |
| 212 | if (!I->mayReadOrWriteMemory()) continue; |
| 213 | } else /* Read */ { |
| 214 | if (!I->mayWriteToMemory()) continue; |
| 215 | } |
| 216 | AliasAnalysis::Location A = getLocation(&*I); |
| 217 | AliasAnalysis::Location B = getLocation(Src); |
| 218 | |
| 219 | if (!A.Ptr || !B.Ptr || AA->alias(A, B)) |
| 220 | return I; |
| 221 | } |
| 222 | return 0; |
| 223 | } |
| 224 | |
Nadav Rotem | 0b9cf85 | 2013-04-14 03:22:20 +0000 | [diff] [blame] | 225 | void BoUpSLP::vectorizeArith(ValueList &Operands) { |
| 226 | Value *Vec = vectorizeTree(Operands, Operands.size()); |
| 227 | BasicBlock::iterator Loc = cast<Instruction>(Vec); |
| 228 | IRBuilder<> Builder(++Loc); |
Nadav Rotem | 3403c11 | 2013-04-14 07:22:22 +0000 | [diff] [blame] | 229 | // After vectorizing the operands we need to generate extractelement |
| 230 | // instructions and replace all of the uses of the scalar values with |
| 231 | // the values that we extracted from the vectorized tree. |
Nadav Rotem | 0b9cf85 | 2013-04-14 03:22:20 +0000 | [diff] [blame] | 232 | for (unsigned i = 0, e = Operands.size(); i != e; ++i) { |
| 233 | Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i)); |
| 234 | Operands[i]->replaceAllUsesWith(S); |
| 235 | } |
| 236 | } |
| 237 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 238 | int BoUpSLP::getTreeCost(ValueList &VL) { |
| 239 | // Get rid of the list of stores that were removed, and from the |
| 240 | // lists of instructions with multiple users. |
| 241 | MemBarrierIgnoreList.clear(); |
| 242 | LaneMap.clear(); |
| 243 | MultiUserVals.clear(); |
| 244 | MustScalarize.clear(); |
| 245 | |
| 246 | // Scan the tree and find which value is used by which lane, and which values |
| 247 | // must be scalarized. |
| 248 | getTreeUses_rec(VL, 0); |
| 249 | |
| 250 | // Check that instructions with multiple users can be vectorized. Mark unsafe |
| 251 | // instructions. |
| 252 | for (ValueSet::iterator it = MultiUserVals.begin(), |
| 253 | e = MultiUserVals.end(); it != e; ++it) { |
| 254 | // Check that all of the users of this instr are within the tree |
| 255 | // and that they are all from the same lane. |
| 256 | int Lane = -1; |
| 257 | for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end(); |
| 258 | I != E; ++I) { |
| 259 | if (LaneMap.find(*I) == LaneMap.end()) { |
| 260 | MustScalarize.insert(*it); |
| 261 | DEBUG(dbgs()<<"SLP: Adding " << **it << |
| 262 | " to MustScalarize because of an out of tree usage.\n"); |
| 263 | break; |
| 264 | } |
| 265 | if (Lane == -1) Lane = LaneMap[*I]; |
| 266 | if (Lane != LaneMap[*I]) { |
| 267 | MustScalarize.insert(*it); |
| 268 | DEBUG(dbgs()<<"Adding " << **it << |
| 269 | " to MustScalarize because multiple lane use it: " |
| 270 | << Lane << " and " << LaneMap[*I] << ".\n"); |
| 271 | break; |
| 272 | } |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | // Now calculate the cost of vectorizing the tree. |
| 277 | return getTreeCost_rec(VL, 0); |
| 278 | } |
| 279 | |
| 280 | void BoUpSLP::getTreeUses_rec(ValueList &VL, unsigned Depth) { |
| 281 | if (Depth == RecursionMaxDepth) return; |
| 282 | |
| 283 | // Don't handle vectors. |
| 284 | if (VL[0]->getType()->isVectorTy()) return; |
| 285 | if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) |
| 286 | if (SI->getValueOperand()->getType()->isVectorTy()) return; |
| 287 | |
| 288 | // Check if all of the operands are constants. |
| 289 | bool AllConst = true; |
| 290 | bool AllSameScalar = true; |
| 291 | for (unsigned i = 0, e = VL.size(); i < e; ++i) { |
| 292 | AllConst &= isa<Constant>(VL[i]); |
| 293 | AllSameScalar &= (VL[0] == VL[i]); |
| 294 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
| 295 | // If one of the instructions is out of this BB, we need to scalarize all. |
| 296 | if (I && I->getParent() != BB) return; |
| 297 | } |
| 298 | |
| 299 | // If all of the operands are identical or constant we have a simple solution. |
| 300 | if (AllConst || AllSameScalar) return; |
| 301 | |
| 302 | // Scalarize unknown structures. |
| 303 | Instruction *VL0 = dyn_cast<Instruction>(VL[0]); |
| 304 | if (!VL0) return; |
| 305 | |
| 306 | unsigned Opcode = VL0->getOpcode(); |
| 307 | for (unsigned i = 0, e = VL.size(); i < e; ++i) { |
| 308 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
| 309 | // If not all of the instructions are identical then we have to scalarize. |
| 310 | if (!I || Opcode != I->getOpcode()) return; |
| 311 | } |
| 312 | |
| 313 | // Mark instructions with multiple users. |
| 314 | for (unsigned i = 0, e = VL.size(); i < e; ++i) { |
| 315 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
| 316 | // Remember to check if all of the users of this instr are vectorized |
| 317 | // within our tree. |
| 318 | if (I && I->getNumUses() > 1) MultiUserVals.insert(I); |
| 319 | } |
| 320 | |
| 321 | for (int i = 0, e = VL.size(); i < e; ++i) { |
| 322 | // Check that the instruction is only used within |
| 323 | // one lane. |
| 324 | if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return; |
| 325 | // Make this instruction as 'seen' and remember the lane. |
| 326 | LaneMap[VL[i]] = i; |
| 327 | } |
| 328 | |
| 329 | switch (Opcode) { |
| 330 | case Instruction::Add: |
| 331 | case Instruction::FAdd: |
| 332 | case Instruction::Sub: |
| 333 | case Instruction::FSub: |
| 334 | case Instruction::Mul: |
| 335 | case Instruction::FMul: |
| 336 | case Instruction::UDiv: |
| 337 | case Instruction::SDiv: |
| 338 | case Instruction::FDiv: |
| 339 | case Instruction::URem: |
| 340 | case Instruction::SRem: |
| 341 | case Instruction::FRem: |
| 342 | case Instruction::Shl: |
| 343 | case Instruction::LShr: |
| 344 | case Instruction::AShr: |
| 345 | case Instruction::And: |
| 346 | case Instruction::Or: |
| 347 | case Instruction::Xor: { |
| 348 | for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { |
| 349 | ValueList Operands; |
| 350 | // Prepare the operand vector. |
| 351 | for (unsigned j = 0; j < VL.size(); ++j) |
| 352 | Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); |
| 353 | |
| 354 | getTreeUses_rec(Operands, Depth+1); |
| 355 | } |
| 356 | } |
| 357 | case Instruction::Store: { |
| 358 | ValueList Operands; |
| 359 | for (unsigned j = 0; j < VL.size(); ++j) |
| 360 | Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); |
| 361 | getTreeUses_rec(Operands, Depth+1); |
| 362 | return; |
| 363 | } |
| 364 | default: |
| 365 | return; |
| 366 | } |
| 367 | } |
| 368 | |
| 369 | int BoUpSLP::getTreeCost_rec(ValueList &VL, unsigned Depth) { |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 370 | Type *ScalarTy = VL[0]->getType(); |
| 371 | |
| 372 | if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) |
| 373 | ScalarTy = SI->getValueOperand()->getType(); |
| 374 | |
| 375 | /// Don't mess with vectors. |
| 376 | if (ScalarTy->isVectorTy()) return max_cost; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 377 | VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); |
| 378 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 379 | if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy); |
| 380 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 381 | // Check if all of the operands are constants. |
| 382 | bool AllConst = true; |
| 383 | bool AllSameScalar = true; |
| 384 | for (unsigned i = 0, e = VL.size(); i < e; ++i) { |
| 385 | AllConst &= isa<Constant>(VL[i]); |
| 386 | AllSameScalar &= (VL[0] == VL[i]); |
| 387 | // Must have a single use. |
| 388 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 389 | // This instruction is outside the basic block or if it is a known hazard. |
| 390 | if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB)) |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 391 | return getScalarizationCost(VecTy); |
| 392 | } |
| 393 | |
| 394 | // Is this a simple vector constant. |
| 395 | if (AllConst) return 0; |
| 396 | |
| 397 | // If all of the operands are identical we can broadcast them. |
| 398 | if (AllSameScalar) |
| 399 | return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); |
| 400 | |
| 401 | // Scalarize unknown structures. |
| 402 | Instruction *VL0 = dyn_cast<Instruction>(VL[0]); |
| 403 | if (!VL0) return getScalarizationCost(VecTy); |
| 404 | assert(VL0->getParent() == BB && "Wrong BB"); |
| 405 | |
| 406 | unsigned Opcode = VL0->getOpcode(); |
| 407 | for (unsigned i = 0, e = VL.size(); i < e; ++i) { |
| 408 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
| 409 | // If not all of the instructions are identical then we have to scalarize. |
| 410 | if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy); |
| 411 | } |
| 412 | |
| 413 | // Check if it is safe to sink the loads or the stores. |
| 414 | if (Opcode == Instruction::Load || Opcode == Instruction::Store) { |
| 415 | int MaxIdx = InstrIdx[VL0]; |
| 416 | for (unsigned i = 1, e = VL.size(); i < e; ++i ) |
| 417 | MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); |
| 418 | |
| 419 | Instruction *Last = InstrVec[MaxIdx]; |
| 420 | for (unsigned i = 0, e = VL.size(); i < e; ++i ) { |
| 421 | if (VL[i] == Last) continue; |
| 422 | Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last); |
| 423 | if (Barrier) { |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 424 | DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 425 | *Last << "\n because of " << *Barrier << "\n"); |
| 426 | return max_cost; |
| 427 | } |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | switch (Opcode) { |
| 432 | case Instruction::Add: |
| 433 | case Instruction::FAdd: |
| 434 | case Instruction::Sub: |
| 435 | case Instruction::FSub: |
| 436 | case Instruction::Mul: |
| 437 | case Instruction::FMul: |
| 438 | case Instruction::UDiv: |
| 439 | case Instruction::SDiv: |
| 440 | case Instruction::FDiv: |
| 441 | case Instruction::URem: |
| 442 | case Instruction::SRem: |
| 443 | case Instruction::FRem: |
| 444 | case Instruction::Shl: |
| 445 | case Instruction::LShr: |
| 446 | case Instruction::AShr: |
| 447 | case Instruction::And: |
| 448 | case Instruction::Or: |
| 449 | case Instruction::Xor: { |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 450 | int Cost = 0; |
| 451 | // Calculate the cost of all of the operands. |
| 452 | for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 453 | ValueList Operands; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 454 | // Prepare the operand vector. |
| 455 | for (unsigned j = 0; j < VL.size(); ++j) |
| 456 | Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 457 | |
| 458 | Cost += getTreeCost_rec(Operands, Depth+1); |
| 459 | if (Cost >= max_cost) return max_cost; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 460 | } |
| 461 | |
| 462 | // Calculate the cost of this instruction. |
| 463 | int ScalarCost = VecTy->getNumElements() * |
| 464 | TTI->getArithmeticInstrCost(Opcode, ScalarTy); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 465 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 466 | int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); |
| 467 | Cost += (VecCost - ScalarCost); |
| 468 | return Cost; |
| 469 | } |
| 470 | case Instruction::Load: { |
| 471 | // If we are scalarize the loads, add the cost of forming the vector. |
| 472 | for (unsigned i = 0, e = VL.size()-1; i < e; ++i) |
| 473 | if (!isConsecutiveAccess(VL[i], VL[i+1])) |
| 474 | return getScalarizationCost(VecTy); |
| 475 | |
| 476 | // Cost of wide load - cost of scalar loads. |
| 477 | int ScalarLdCost = VecTy->getNumElements() * |
| 478 | TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); |
| 479 | int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); |
| 480 | return VecLdCost - ScalarLdCost; |
| 481 | } |
| 482 | case Instruction::Store: { |
| 483 | // We know that we can merge the stores. Calculate the cost. |
| 484 | int ScalarStCost = VecTy->getNumElements() * |
| 485 | TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); |
| 486 | int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0); |
| 487 | int StoreCost = VecStCost - ScalarStCost; |
| 488 | |
| 489 | ValueList Operands; |
| 490 | for (unsigned j = 0; j < VL.size(); ++j) { |
| 491 | Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); |
| 492 | MemBarrierIgnoreList.insert(VL[j]); |
| 493 | } |
| 494 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 495 | int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1); |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 496 | return TotalCost; |
| 497 | } |
| 498 | default: |
| 499 | // Unable to vectorize unknown instructions. |
| 500 | return getScalarizationCost(VecTy); |
| 501 | } |
| 502 | } |
| 503 | |
| 504 | Instruction *BoUpSLP::GetLastInstr(ValueList &VL, unsigned VF) { |
| 505 | int MaxIdx = InstrIdx[BB->getFirstNonPHI()]; |
| 506 | for (unsigned i = 0; i < VF; ++i ) |
| 507 | MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); |
| 508 | return InstrVec[MaxIdx + 1]; |
| 509 | } |
| 510 | |
| 511 | Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) { |
| 512 | IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements())); |
| 513 | Value *Vec = UndefValue::get(Ty); |
Nadav Rotem | b9116e6 | 2013-04-15 22:00:26 +0000 | [diff] [blame] | 514 | for (unsigned i=0; i < Ty->getNumElements(); ++i) { |
| 515 | // Generate the 'InsertElement' instruction. |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 516 | Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); |
Nadav Rotem | b9116e6 | 2013-04-15 22:00:26 +0000 | [diff] [blame] | 517 | // Remember that this instruction is used as part of a 'gather' sequence. |
| 518 | // The caller of the bottom-up slp vectorizer can try to hoist the sequence |
| 519 | // if the users are outside of the basic block. |
| 520 | GatherInstructions.push_back(Vec); |
| 521 | } |
| 522 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 523 | return Vec; |
| 524 | } |
| 525 | |
| 526 | Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) { |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 527 | Value *V = vectorizeTree_rec(VL, VF); |
| 528 | // We moved some instructions around. We have to number them again |
| 529 | // before we can do any analysis. |
| 530 | numberInstructions(); |
| 531 | MustScalarize.clear(); |
| 532 | return V; |
| 533 | } |
| 534 | |
| 535 | Value *BoUpSLP::vectorizeTree_rec(ValueList &VL, int VF) { |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 536 | Type *ScalarTy = VL[0]->getType(); |
| 537 | if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) |
| 538 | ScalarTy = SI->getValueOperand()->getType(); |
| 539 | VectorType *VecTy = VectorType::get(ScalarTy, VF); |
| 540 | |
| 541 | // Check if all of the operands are constants or identical. |
| 542 | bool AllConst = true; |
| 543 | bool AllSameScalar = true; |
| 544 | for (unsigned i = 0, e = VF; i < e; ++i) { |
| 545 | AllConst &= !!dyn_cast<Constant>(VL[i]); |
| 546 | AllSameScalar &= (VL[0] == VL[i]); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 547 | // The instruction must be in the same BB, and it must be vectorizable. |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 548 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 549 | if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB)) |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 550 | return Scalarize(VL, VecTy); |
| 551 | } |
| 552 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 553 | // Check that this is a simple vector constant. |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 554 | if (AllConst || AllSameScalar) return Scalarize(VL, VecTy); |
| 555 | |
| 556 | // Scalarize unknown structures. |
| 557 | Instruction *VL0 = dyn_cast<Instruction>(VL[0]); |
| 558 | if (!VL0) return Scalarize(VL, VecTy); |
| 559 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 560 | if (VectorizedValues.count(VL0)) return VectorizedValues[VL0]; |
| 561 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 562 | unsigned Opcode = VL0->getOpcode(); |
| 563 | for (unsigned i = 0, e = VF; i < e; ++i) { |
| 564 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
| 565 | // If not all of the instructions are identical then we have to scalarize. |
| 566 | if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy); |
| 567 | } |
| 568 | |
| 569 | switch (Opcode) { |
| 570 | case Instruction::Add: |
| 571 | case Instruction::FAdd: |
| 572 | case Instruction::Sub: |
| 573 | case Instruction::FSub: |
| 574 | case Instruction::Mul: |
| 575 | case Instruction::FMul: |
| 576 | case Instruction::UDiv: |
| 577 | case Instruction::SDiv: |
| 578 | case Instruction::FDiv: |
| 579 | case Instruction::URem: |
| 580 | case Instruction::SRem: |
| 581 | case Instruction::FRem: |
| 582 | case Instruction::Shl: |
| 583 | case Instruction::LShr: |
| 584 | case Instruction::AShr: |
| 585 | case Instruction::And: |
| 586 | case Instruction::Or: |
| 587 | case Instruction::Xor: { |
| 588 | ValueList LHSVL, RHSVL; |
| 589 | for (int i = 0; i < VF; ++i) { |
| 590 | RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); |
| 591 | LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); |
| 592 | } |
| 593 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 594 | Value *RHS = vectorizeTree_rec(RHSVL, VF); |
| 595 | Value *LHS = vectorizeTree_rec(LHSVL, VF); |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 596 | IRBuilder<> Builder(GetLastInstr(VL, VF)); |
| 597 | BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 598 | Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); |
| 599 | VectorizedValues[VL0] = V; |
| 600 | return V; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 601 | } |
| 602 | case Instruction::Load: { |
| 603 | LoadInst *LI = dyn_cast<LoadInst>(VL0); |
| 604 | unsigned Alignment = LI->getAlignment(); |
| 605 | |
| 606 | // Check if all of the loads are consecutive. |
| 607 | for (unsigned i = 1, e = VF; i < e; ++i) |
| 608 | if (!isConsecutiveAccess(VL[i-1], VL[i])) |
| 609 | return Scalarize(VL, VecTy); |
| 610 | |
| 611 | IRBuilder<> Builder(GetLastInstr(VL, VF)); |
| 612 | Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), |
| 613 | VecTy->getPointerTo()); |
| 614 | LI = Builder.CreateLoad(VecPtr); |
| 615 | LI->setAlignment(Alignment); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 616 | VectorizedValues[VL0] = LI; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 617 | return LI; |
| 618 | } |
| 619 | case Instruction::Store: { |
| 620 | StoreInst *SI = dyn_cast<StoreInst>(VL0); |
| 621 | unsigned Alignment = SI->getAlignment(); |
| 622 | |
| 623 | ValueList ValueOp; |
| 624 | for (int i = 0; i < VF; ++i) |
| 625 | ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand()); |
| 626 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 627 | Value *VecValue = vectorizeTree_rec(ValueOp, VF); |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 628 | |
| 629 | IRBuilder<> Builder(GetLastInstr(VL, VF)); |
| 630 | Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), |
| 631 | VecTy->getPointerTo()); |
| 632 | Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); |
| 633 | |
| 634 | for (int i = 0; i < VF; ++i) |
| 635 | cast<Instruction>(VL[i])->eraseFromParent(); |
| 636 | return 0; |
| 637 | } |
| 638 | default: |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame] | 639 | Value *S = Scalarize(VL, VecTy); |
| 640 | VectorizedValues[VL0] = S; |
| 641 | return S; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 642 | } |
| 643 | } |
| 644 | |
| 645 | } // end of namespace |