Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 1 | //===- VecUtils.h --- Vectorization Utilities -----------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 9 | #define DEBUG_TYPE "SLP" |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 10 | |
| 11 | #include "VecUtils.h" |
| 12 | #include "llvm/ADT/DenseMap.h" |
| 13 | #include "llvm/ADT/SmallPtrSet.h" |
| 14 | #include "llvm/ADT/SmallSet.h" |
| 15 | #include "llvm/ADT/SmallVector.h" |
| 16 | #include "llvm/Analysis/AliasAnalysis.h" |
| 17 | #include "llvm/Analysis/ScalarEvolution.h" |
| 18 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| 19 | #include "llvm/Analysis/TargetTransformInfo.h" |
| 20 | #include "llvm/Analysis/Verifier.h" |
| 21 | #include "llvm/IR/Constants.h" |
| 22 | #include "llvm/IR/DataLayout.h" |
| 23 | #include "llvm/IR/Function.h" |
| 24 | #include "llvm/IR/Instructions.h" |
| 25 | #include "llvm/IR/Module.h" |
| 26 | #include "llvm/IR/Type.h" |
| 27 | #include "llvm/IR/Value.h" |
| 28 | #include "llvm/Pass.h" |
| 29 | #include "llvm/Support/CommandLine.h" |
| 30 | #include "llvm/Support/Debug.h" |
| 31 | #include "llvm/Support/raw_ostream.h" |
| 32 | #include "llvm/Target/TargetLibraryInfo.h" |
| 33 | #include "llvm/Transforms/Scalar.h" |
| 34 | #include "llvm/Transforms/Utils/Local.h" |
| 35 | #include <algorithm> |
| 36 | #include <map> |
| 37 | |
| 38 | using namespace llvm; |
| 39 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 40 | static const unsigned MinVecRegSize = 128; |
| 41 | |
| 42 | static const unsigned RecursionMaxDepth = 6; |
| 43 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 44 | namespace llvm { |
| 45 | |
| 46 | BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl, |
| 47 | TargetTransformInfo *Tti, AliasAnalysis *Aa) : |
| 48 | BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa) { |
| 49 | numberInstructions(); |
| 50 | } |
| 51 | |
| 52 | void BoUpSLP::numberInstructions() { |
| 53 | int Loc = 0; |
| 54 | InstrIdx.clear(); |
| 55 | InstrVec.clear(); |
| 56 | // Number the instructions in the block. |
| 57 | for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) { |
| 58 | InstrIdx[it] = Loc++; |
| 59 | InstrVec.push_back(it); |
| 60 | assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation"); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | Value *BoUpSLP::getPointerOperand(Value *I) { |
| 65 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); |
| 66 | if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); |
| 67 | return 0; |
| 68 | } |
| 69 | |
| 70 | unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { |
| 71 | if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace(); |
| 72 | if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace(); |
| 73 | return -1; |
| 74 | } |
| 75 | |
| 76 | bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { |
| 77 | Value *PtrA = getPointerOperand(A); |
| 78 | Value *PtrB = getPointerOperand(B); |
| 79 | unsigned ASA = getAddressSpaceOperand(A); |
| 80 | unsigned ASB = getAddressSpaceOperand(B); |
| 81 | |
| 82 | // Check that the address spaces match and that the pointers are valid. |
| 83 | if (!PtrA || !PtrB || (ASA != ASB)) return false; |
| 84 | |
| 85 | // Check that A and B are of the same type. |
| 86 | if (PtrA->getType() != PtrB->getType()) return false; |
| 87 | |
| 88 | // Calculate the distance. |
| 89 | const SCEV *PtrSCEVA = SE->getSCEV(PtrA); |
| 90 | const SCEV *PtrSCEVB = SE->getSCEV(PtrB); |
| 91 | const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB); |
| 92 | const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV); |
| 93 | |
| 94 | // Non constant distance. |
| 95 | if (!ConstOffSCEV) return false; |
| 96 | |
| 97 | unsigned Offset = ConstOffSCEV->getValue()->getSExtValue(); |
| 98 | Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); |
| 99 | // The Instructions are connsecutive if the size of the first load/store is |
| 100 | // the same as the offset. |
Nadav Rotem | 88dd5f7 | 2013-04-10 18:57:27 +0000 | [diff] [blame] | 101 | unsigned Sz = DL->getTypeStoreSize(Ty); |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 102 | return ((-Offset) == Sz); |
| 103 | } |
| 104 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 105 | bool BoUpSLP::vectorizeStoreChain(ValueList &Chain, int CostThreshold) { |
| 106 | Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); |
| 107 | unsigned Sz = DL->getTypeSizeInBits(StoreTy); |
| 108 | unsigned VF = MinVecRegSize / Sz; |
| 109 | |
| 110 | if (!isPowerOf2_32(Sz) || VF < 2) return false; |
| 111 | |
| 112 | bool Changed = false; |
| 113 | for (unsigned i = 0, e = Chain.size(); i < e; ++i) { |
| 114 | if (i + VF > e) return Changed; |
| 115 | DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n"); |
| 116 | ValueList Operands(&Chain[i], &Chain[i] + VF); |
| 117 | |
| 118 | int Cost = getTreeCost(Operands); |
| 119 | DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); |
| 120 | if (Cost < CostThreshold) { |
| 121 | DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); |
| 122 | vectorizeTree(Operands, VF); |
| 123 | i += VF; |
| 124 | Changed = true; |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | return Changed; |
| 129 | } |
| 130 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 131 | bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) { |
| 132 | ValueSet Heads, Tails; |
| 133 | SmallDenseMap<Value*, Value*> ConsecutiveChain; |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 134 | |
| 135 | // We may run into multiple chains that merge into a single chain. We mark the |
| 136 | // stores that we vectorized so that we don't visit the same store twice. |
| 137 | ValueSet VectorizedStores; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 138 | bool Changed = false; |
| 139 | |
| 140 | // Do a quadratic search on all of the given stores and find |
| 141 | // all of the pairs of loads that follow each other. |
| 142 | for (unsigned i = 0, e = Stores.size(); i < e; ++i) |
| 143 | for (unsigned j = 0; j < e; ++j) { |
| 144 | if (i == j) continue; |
| 145 | if (isConsecutiveAccess(Stores[i], Stores[j])) { |
| 146 | Tails.insert(Stores[j]); |
| 147 | Heads.insert(Stores[i]); |
| 148 | ConsecutiveChain[Stores[i]] = Stores[j]; |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | // For stores that start but don't end a link in the chain: |
| 153 | for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) { |
| 154 | if (Tails.count(*it)) continue; |
| 155 | |
| 156 | // We found a store instr that starts a chain. Now follow the chain and try |
| 157 | // to vectorize it. |
| 158 | ValueList Operands; |
| 159 | Value *I = *it; |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 160 | // Collect the chain into a list. |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 161 | while (Tails.count(I) || Heads.count(I)) { |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 162 | if (VectorizedStores.count(I)) break; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 163 | Operands.push_back(I); |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 164 | // Move to the next value in the chain. |
| 165 | I = ConsecutiveChain[I]; |
| 166 | } |
| 167 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 168 | bool Vectorized = vectorizeStoreChain(Operands, costThreshold); |
| 169 | if (Vectorized) VectorizedStores.insert(Operands.begin(), Operands.end()); |
| 170 | Changed |= Vectorized; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 171 | } |
| 172 | |
| 173 | return Changed; |
| 174 | } |
| 175 | |
| 176 | int BoUpSLP::getScalarizationCost(Type *Ty) { |
| 177 | int Cost = 0; |
| 178 | for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) |
| 179 | Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
| 180 | return Cost; |
| 181 | } |
| 182 | |
| 183 | AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) { |
| 184 | if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI); |
| 185 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI); |
| 186 | return AliasAnalysis::Location(); |
| 187 | } |
| 188 | |
| 189 | Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) { |
| 190 | assert(Src->getParent() == Dst->getParent() && "Not the same BB"); |
| 191 | BasicBlock::iterator I = Src, E = Dst; |
| 192 | /// Scan all of the instruction from SRC to DST and check if |
| 193 | /// the source may alias. |
| 194 | for (++I; I != E; ++I) { |
| 195 | // Ignore store instructions that are marked as 'ignore'. |
| 196 | if (MemBarrierIgnoreList.count(I)) continue; |
| 197 | if (Src->mayWriteToMemory()) /* Write */ { |
| 198 | if (!I->mayReadOrWriteMemory()) continue; |
| 199 | } else /* Read */ { |
| 200 | if (!I->mayWriteToMemory()) continue; |
| 201 | } |
| 202 | AliasAnalysis::Location A = getLocation(&*I); |
| 203 | AliasAnalysis::Location B = getLocation(Src); |
| 204 | |
| 205 | if (!A.Ptr || !B.Ptr || AA->alias(A, B)) |
| 206 | return I; |
| 207 | } |
| 208 | return 0; |
| 209 | } |
| 210 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 211 | int BoUpSLP::getTreeCost(ValueList &VL) { |
| 212 | // Get rid of the list of stores that were removed, and from the |
| 213 | // lists of instructions with multiple users. |
| 214 | MemBarrierIgnoreList.clear(); |
| 215 | LaneMap.clear(); |
| 216 | MultiUserVals.clear(); |
| 217 | MustScalarize.clear(); |
| 218 | |
| 219 | // Scan the tree and find which value is used by which lane, and which values |
| 220 | // must be scalarized. |
| 221 | getTreeUses_rec(VL, 0); |
| 222 | |
| 223 | // Check that instructions with multiple users can be vectorized. Mark unsafe |
| 224 | // instructions. |
| 225 | for (ValueSet::iterator it = MultiUserVals.begin(), |
| 226 | e = MultiUserVals.end(); it != e; ++it) { |
| 227 | // Check that all of the users of this instr are within the tree |
| 228 | // and that they are all from the same lane. |
| 229 | int Lane = -1; |
| 230 | for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end(); |
| 231 | I != E; ++I) { |
| 232 | if (LaneMap.find(*I) == LaneMap.end()) { |
| 233 | MustScalarize.insert(*it); |
| 234 | DEBUG(dbgs()<<"SLP: Adding " << **it << |
| 235 | " to MustScalarize because of an out of tree usage.\n"); |
| 236 | break; |
| 237 | } |
| 238 | if (Lane == -1) Lane = LaneMap[*I]; |
| 239 | if (Lane != LaneMap[*I]) { |
| 240 | MustScalarize.insert(*it); |
| 241 | DEBUG(dbgs()<<"Adding " << **it << |
| 242 | " to MustScalarize because multiple lane use it: " |
| 243 | << Lane << " and " << LaneMap[*I] << ".\n"); |
| 244 | break; |
| 245 | } |
| 246 | } |
| 247 | } |
| 248 | |
| 249 | // Now calculate the cost of vectorizing the tree. |
| 250 | return getTreeCost_rec(VL, 0); |
| 251 | } |
| 252 | |
| 253 | void BoUpSLP::getTreeUses_rec(ValueList &VL, unsigned Depth) { |
| 254 | if (Depth == RecursionMaxDepth) return; |
| 255 | |
| 256 | // Don't handle vectors. |
| 257 | if (VL[0]->getType()->isVectorTy()) return; |
| 258 | if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) |
| 259 | if (SI->getValueOperand()->getType()->isVectorTy()) return; |
| 260 | |
| 261 | // Check if all of the operands are constants. |
| 262 | bool AllConst = true; |
| 263 | bool AllSameScalar = true; |
| 264 | for (unsigned i = 0, e = VL.size(); i < e; ++i) { |
| 265 | AllConst &= isa<Constant>(VL[i]); |
| 266 | AllSameScalar &= (VL[0] == VL[i]); |
| 267 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
| 268 | // If one of the instructions is out of this BB, we need to scalarize all. |
| 269 | if (I && I->getParent() != BB) return; |
| 270 | } |
| 271 | |
| 272 | // If all of the operands are identical or constant we have a simple solution. |
| 273 | if (AllConst || AllSameScalar) return; |
| 274 | |
| 275 | // Scalarize unknown structures. |
| 276 | Instruction *VL0 = dyn_cast<Instruction>(VL[0]); |
| 277 | if (!VL0) return; |
| 278 | |
| 279 | unsigned Opcode = VL0->getOpcode(); |
| 280 | for (unsigned i = 0, e = VL.size(); i < e; ++i) { |
| 281 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
| 282 | // If not all of the instructions are identical then we have to scalarize. |
| 283 | if (!I || Opcode != I->getOpcode()) return; |
| 284 | } |
| 285 | |
| 286 | // Mark instructions with multiple users. |
| 287 | for (unsigned i = 0, e = VL.size(); i < e; ++i) { |
| 288 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
| 289 | // Remember to check if all of the users of this instr are vectorized |
| 290 | // within our tree. |
| 291 | if (I && I->getNumUses() > 1) MultiUserVals.insert(I); |
| 292 | } |
| 293 | |
| 294 | for (int i = 0, e = VL.size(); i < e; ++i) { |
| 295 | // Check that the instruction is only used within |
| 296 | // one lane. |
| 297 | if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return; |
| 298 | // Make this instruction as 'seen' and remember the lane. |
| 299 | LaneMap[VL[i]] = i; |
| 300 | } |
| 301 | |
| 302 | switch (Opcode) { |
| 303 | case Instruction::Add: |
| 304 | case Instruction::FAdd: |
| 305 | case Instruction::Sub: |
| 306 | case Instruction::FSub: |
| 307 | case Instruction::Mul: |
| 308 | case Instruction::FMul: |
| 309 | case Instruction::UDiv: |
| 310 | case Instruction::SDiv: |
| 311 | case Instruction::FDiv: |
| 312 | case Instruction::URem: |
| 313 | case Instruction::SRem: |
| 314 | case Instruction::FRem: |
| 315 | case Instruction::Shl: |
| 316 | case Instruction::LShr: |
| 317 | case Instruction::AShr: |
| 318 | case Instruction::And: |
| 319 | case Instruction::Or: |
| 320 | case Instruction::Xor: { |
| 321 | for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { |
| 322 | ValueList Operands; |
| 323 | // Prepare the operand vector. |
| 324 | for (unsigned j = 0; j < VL.size(); ++j) |
| 325 | Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); |
| 326 | |
| 327 | getTreeUses_rec(Operands, Depth+1); |
| 328 | } |
| 329 | } |
| 330 | case Instruction::Store: { |
| 331 | ValueList Operands; |
| 332 | for (unsigned j = 0; j < VL.size(); ++j) |
| 333 | Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); |
| 334 | getTreeUses_rec(Operands, Depth+1); |
| 335 | return; |
| 336 | } |
| 337 | default: |
| 338 | return; |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | int BoUpSLP::getTreeCost_rec(ValueList &VL, unsigned Depth) { |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 343 | Type *ScalarTy = VL[0]->getType(); |
| 344 | |
| 345 | if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) |
| 346 | ScalarTy = SI->getValueOperand()->getType(); |
| 347 | |
| 348 | /// Don't mess with vectors. |
| 349 | if (ScalarTy->isVectorTy()) return max_cost; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 350 | VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); |
| 351 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 352 | if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy); |
| 353 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 354 | // Check if all of the operands are constants. |
| 355 | bool AllConst = true; |
| 356 | bool AllSameScalar = true; |
| 357 | for (unsigned i = 0, e = VL.size(); i < e; ++i) { |
| 358 | AllConst &= isa<Constant>(VL[i]); |
| 359 | AllSameScalar &= (VL[0] == VL[i]); |
| 360 | // Must have a single use. |
| 361 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 362 | // This instruction is outside the basic block or if it is a known hazard. |
| 363 | if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB)) |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 364 | return getScalarizationCost(VecTy); |
| 365 | } |
| 366 | |
| 367 | // Is this a simple vector constant. |
| 368 | if (AllConst) return 0; |
| 369 | |
| 370 | // If all of the operands are identical we can broadcast them. |
| 371 | if (AllSameScalar) |
| 372 | return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); |
| 373 | |
| 374 | // Scalarize unknown structures. |
| 375 | Instruction *VL0 = dyn_cast<Instruction>(VL[0]); |
| 376 | if (!VL0) return getScalarizationCost(VecTy); |
| 377 | assert(VL0->getParent() == BB && "Wrong BB"); |
| 378 | |
| 379 | unsigned Opcode = VL0->getOpcode(); |
| 380 | for (unsigned i = 0, e = VL.size(); i < e; ++i) { |
| 381 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
| 382 | // If not all of the instructions are identical then we have to scalarize. |
| 383 | if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy); |
| 384 | } |
| 385 | |
| 386 | // Check if it is safe to sink the loads or the stores. |
| 387 | if (Opcode == Instruction::Load || Opcode == Instruction::Store) { |
| 388 | int MaxIdx = InstrIdx[VL0]; |
| 389 | for (unsigned i = 1, e = VL.size(); i < e; ++i ) |
| 390 | MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); |
| 391 | |
| 392 | Instruction *Last = InstrVec[MaxIdx]; |
| 393 | for (unsigned i = 0, e = VL.size(); i < e; ++i ) { |
| 394 | if (VL[i] == Last) continue; |
| 395 | Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last); |
| 396 | if (Barrier) { |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 397 | DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 398 | *Last << "\n because of " << *Barrier << "\n"); |
| 399 | return max_cost; |
| 400 | } |
| 401 | } |
| 402 | } |
| 403 | |
| 404 | switch (Opcode) { |
| 405 | case Instruction::Add: |
| 406 | case Instruction::FAdd: |
| 407 | case Instruction::Sub: |
| 408 | case Instruction::FSub: |
| 409 | case Instruction::Mul: |
| 410 | case Instruction::FMul: |
| 411 | case Instruction::UDiv: |
| 412 | case Instruction::SDiv: |
| 413 | case Instruction::FDiv: |
| 414 | case Instruction::URem: |
| 415 | case Instruction::SRem: |
| 416 | case Instruction::FRem: |
| 417 | case Instruction::Shl: |
| 418 | case Instruction::LShr: |
| 419 | case Instruction::AShr: |
| 420 | case Instruction::And: |
| 421 | case Instruction::Or: |
| 422 | case Instruction::Xor: { |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 423 | int Cost = 0; |
| 424 | // Calculate the cost of all of the operands. |
| 425 | for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 426 | ValueList Operands; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 427 | // Prepare the operand vector. |
| 428 | for (unsigned j = 0; j < VL.size(); ++j) |
| 429 | Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 430 | |
| 431 | Cost += getTreeCost_rec(Operands, Depth+1); |
| 432 | if (Cost >= max_cost) return max_cost; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 433 | } |
| 434 | |
| 435 | // Calculate the cost of this instruction. |
| 436 | int ScalarCost = VecTy->getNumElements() * |
| 437 | TTI->getArithmeticInstrCost(Opcode, ScalarTy); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 438 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 439 | int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); |
| 440 | Cost += (VecCost - ScalarCost); |
| 441 | return Cost; |
| 442 | } |
| 443 | case Instruction::Load: { |
| 444 | // If we are scalarize the loads, add the cost of forming the vector. |
| 445 | for (unsigned i = 0, e = VL.size()-1; i < e; ++i) |
| 446 | if (!isConsecutiveAccess(VL[i], VL[i+1])) |
| 447 | return getScalarizationCost(VecTy); |
| 448 | |
| 449 | // Cost of wide load - cost of scalar loads. |
| 450 | int ScalarLdCost = VecTy->getNumElements() * |
| 451 | TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); |
| 452 | int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); |
| 453 | return VecLdCost - ScalarLdCost; |
| 454 | } |
| 455 | case Instruction::Store: { |
| 456 | // We know that we can merge the stores. Calculate the cost. |
| 457 | int ScalarStCost = VecTy->getNumElements() * |
| 458 | TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); |
| 459 | int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0); |
| 460 | int StoreCost = VecStCost - ScalarStCost; |
| 461 | |
| 462 | ValueList Operands; |
| 463 | for (unsigned j = 0; j < VL.size(); ++j) { |
| 464 | Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); |
| 465 | MemBarrierIgnoreList.insert(VL[j]); |
| 466 | } |
| 467 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 468 | int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1); |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 469 | return TotalCost; |
| 470 | } |
| 471 | default: |
| 472 | // Unable to vectorize unknown instructions. |
| 473 | return getScalarizationCost(VecTy); |
| 474 | } |
| 475 | } |
| 476 | |
| 477 | Instruction *BoUpSLP::GetLastInstr(ValueList &VL, unsigned VF) { |
| 478 | int MaxIdx = InstrIdx[BB->getFirstNonPHI()]; |
| 479 | for (unsigned i = 0; i < VF; ++i ) |
| 480 | MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); |
| 481 | return InstrVec[MaxIdx + 1]; |
| 482 | } |
| 483 | |
| 484 | Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) { |
| 485 | IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements())); |
| 486 | Value *Vec = UndefValue::get(Ty); |
| 487 | for (unsigned i=0; i < Ty->getNumElements(); ++i) |
| 488 | Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); |
| 489 | return Vec; |
| 490 | } |
| 491 | |
| 492 | Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) { |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 493 | Value *V = vectorizeTree_rec(VL, VF); |
| 494 | // We moved some instructions around. We have to number them again |
| 495 | // before we can do any analysis. |
| 496 | numberInstructions(); |
| 497 | MustScalarize.clear(); |
| 498 | return V; |
| 499 | } |
| 500 | |
| 501 | Value *BoUpSLP::vectorizeTree_rec(ValueList &VL, int VF) { |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 502 | Type *ScalarTy = VL[0]->getType(); |
| 503 | if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) |
| 504 | ScalarTy = SI->getValueOperand()->getType(); |
| 505 | VectorType *VecTy = VectorType::get(ScalarTy, VF); |
| 506 | |
| 507 | // Check if all of the operands are constants or identical. |
| 508 | bool AllConst = true; |
| 509 | bool AllSameScalar = true; |
| 510 | for (unsigned i = 0, e = VF; i < e; ++i) { |
| 511 | AllConst &= !!dyn_cast<Constant>(VL[i]); |
| 512 | AllSameScalar &= (VL[0] == VL[i]); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 513 | // The instruction must be in the same BB, and it must be vectorizable. |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 514 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 515 | if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB)) |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 516 | return Scalarize(VL, VecTy); |
| 517 | } |
| 518 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 519 | // Check that this is a simple vector constant. |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 520 | if (AllConst || AllSameScalar) return Scalarize(VL, VecTy); |
| 521 | |
| 522 | // Scalarize unknown structures. |
| 523 | Instruction *VL0 = dyn_cast<Instruction>(VL[0]); |
| 524 | if (!VL0) return Scalarize(VL, VecTy); |
| 525 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 526 | if (VectorizedValues.count(VL0)) return VectorizedValues[VL0]; |
| 527 | |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 528 | unsigned Opcode = VL0->getOpcode(); |
| 529 | for (unsigned i = 0, e = VF; i < e; ++i) { |
| 530 | Instruction *I = dyn_cast<Instruction>(VL[i]); |
| 531 | // If not all of the instructions are identical then we have to scalarize. |
| 532 | if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy); |
| 533 | } |
| 534 | |
| 535 | switch (Opcode) { |
| 536 | case Instruction::Add: |
| 537 | case Instruction::FAdd: |
| 538 | case Instruction::Sub: |
| 539 | case Instruction::FSub: |
| 540 | case Instruction::Mul: |
| 541 | case Instruction::FMul: |
| 542 | case Instruction::UDiv: |
| 543 | case Instruction::SDiv: |
| 544 | case Instruction::FDiv: |
| 545 | case Instruction::URem: |
| 546 | case Instruction::SRem: |
| 547 | case Instruction::FRem: |
| 548 | case Instruction::Shl: |
| 549 | case Instruction::LShr: |
| 550 | case Instruction::AShr: |
| 551 | case Instruction::And: |
| 552 | case Instruction::Or: |
| 553 | case Instruction::Xor: { |
| 554 | ValueList LHSVL, RHSVL; |
| 555 | for (int i = 0; i < VF; ++i) { |
| 556 | RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); |
| 557 | LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); |
| 558 | } |
| 559 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 560 | Value *RHS = vectorizeTree_rec(RHSVL, VF); |
| 561 | Value *LHS = vectorizeTree_rec(LHSVL, VF); |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 562 | IRBuilder<> Builder(GetLastInstr(VL, VF)); |
| 563 | BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 564 | Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); |
| 565 | VectorizedValues[VL0] = V; |
| 566 | return V; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 567 | } |
| 568 | case Instruction::Load: { |
| 569 | LoadInst *LI = dyn_cast<LoadInst>(VL0); |
| 570 | unsigned Alignment = LI->getAlignment(); |
| 571 | |
| 572 | // Check if all of the loads are consecutive. |
| 573 | for (unsigned i = 1, e = VF; i < e; ++i) |
| 574 | if (!isConsecutiveAccess(VL[i-1], VL[i])) |
| 575 | return Scalarize(VL, VecTy); |
| 576 | |
| 577 | IRBuilder<> Builder(GetLastInstr(VL, VF)); |
| 578 | Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), |
| 579 | VecTy->getPointerTo()); |
| 580 | LI = Builder.CreateLoad(VecPtr); |
| 581 | LI->setAlignment(Alignment); |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 582 | VectorizedValues[VL0] = LI; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 583 | return LI; |
| 584 | } |
| 585 | case Instruction::Store: { |
| 586 | StoreInst *SI = dyn_cast<StoreInst>(VL0); |
| 587 | unsigned Alignment = SI->getAlignment(); |
| 588 | |
| 589 | ValueList ValueOp; |
| 590 | for (int i = 0; i < VF; ++i) |
| 591 | ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand()); |
| 592 | |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 593 | Value *VecValue = vectorizeTree_rec(ValueOp, VF); |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 594 | |
| 595 | IRBuilder<> Builder(GetLastInstr(VL, VF)); |
| 596 | Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), |
| 597 | VecTy->getPointerTo()); |
| 598 | Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); |
| 599 | |
| 600 | for (int i = 0; i < VF; ++i) |
| 601 | cast<Instruction>(VL[i])->eraseFromParent(); |
| 602 | return 0; |
| 603 | } |
| 604 | default: |
Nadav Rotem | 8543ba3 | 2013-04-12 21:16:54 +0000 | [diff] [blame^] | 605 | Value *S = Scalarize(VL, VecTy); |
| 606 | VectorizedValues[VL0] = S; |
| 607 | return S; |
Nadav Rotem | 2d9dec3 | 2013-04-09 19:44:35 +0000 | [diff] [blame] | 608 | } |
| 609 | } |
| 610 | |
| 611 | } // end of namespace |