Chris Lattner | f2836d1 | 2007-03-31 04:06:36 +0000 | [diff] [blame] | 1 | //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file was developed by Chris Lattner and is distributed under |
| 6 | // the University of Illinois Open Source License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This pass munges the code in the input function to better prepare it for |
| 11 | // SelectionDAG-based code generation. This works around limitations in it's |
| 12 | // basic-block-at-a-time approach. It should eventually be removed. |
| 13 | // |
| 14 | //===----------------------------------------------------------------------===// |
| 15 | |
| 16 | #define DEBUG_TYPE "codegenprepare" |
| 17 | #include "llvm/Transforms/Scalar.h" |
| 18 | #include "llvm/Constants.h" |
| 19 | #include "llvm/DerivedTypes.h" |
| 20 | #include "llvm/Function.h" |
| 21 | #include "llvm/Instructions.h" |
| 22 | #include "llvm/Pass.h" |
Chris Lattner | f2836d1 | 2007-03-31 04:06:36 +0000 | [diff] [blame] | 23 | #include "llvm/Target/TargetAsmInfo.h" |
| 24 | #include "llvm/Target/TargetData.h" |
| 25 | #include "llvm/Target/TargetLowering.h" |
| 26 | #include "llvm/Target/TargetMachine.h" |
| 27 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| 28 | #include "llvm/ADT/SmallSet.h" |
Chris Lattner | c374856 | 2007-04-02 01:35:34 +0000 | [diff] [blame^] | 29 | #include "llvm/Support/Debug.h" |
| 30 | #include "llvm/Support/Compiler.h" |
Chris Lattner | f2836d1 | 2007-03-31 04:06:36 +0000 | [diff] [blame] | 31 | using namespace llvm; |
| 32 | |
| 33 | namespace { |
| 34 | class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass { |
| 35 | /// TLI - Keep a pointer of a TargetLowering to consult for determining |
| 36 | /// transformation profitability. |
| 37 | const TargetLowering *TLI; |
| 38 | public: |
| 39 | CodeGenPrepare(const TargetLowering *tli = 0) : TLI(tli) {} |
| 40 | bool runOnFunction(Function &F); |
| 41 | |
| 42 | private: |
Chris Lattner | c374856 | 2007-04-02 01:35:34 +0000 | [diff] [blame^] | 43 | bool EliminateMostlyEmptyBlocks(Function &F); |
| 44 | bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; |
| 45 | void EliminateMostlyEmptyBlock(BasicBlock *BB); |
Chris Lattner | f2836d1 | 2007-03-31 04:06:36 +0000 | [diff] [blame] | 46 | bool OptimizeBlock(BasicBlock &BB); |
| 47 | bool OptimizeGEPExpression(GetElementPtrInst *GEPI); |
| 48 | }; |
| 49 | } |
| 50 | static RegisterPass<CodeGenPrepare> X("codegenprepare", |
| 51 | "Optimize for code generation"); |
| 52 | |
| 53 | FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { |
| 54 | return new CodeGenPrepare(TLI); |
| 55 | } |
| 56 | |
| 57 | |
| 58 | bool CodeGenPrepare::runOnFunction(Function &F) { |
Chris Lattner | f2836d1 | 2007-03-31 04:06:36 +0000 | [diff] [blame] | 59 | bool EverMadeChange = false; |
Chris Lattner | c374856 | 2007-04-02 01:35:34 +0000 | [diff] [blame^] | 60 | |
| 61 | // First pass, eliminate blocks that contain only PHI nodes and an |
| 62 | // unconditional branch. |
| 63 | EverMadeChange |= EliminateMostlyEmptyBlocks(F); |
| 64 | |
| 65 | bool MadeChange = true; |
Chris Lattner | f2836d1 | 2007-03-31 04:06:36 +0000 | [diff] [blame] | 66 | while (MadeChange) { |
| 67 | MadeChange = false; |
| 68 | for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) |
| 69 | MadeChange |= OptimizeBlock(*BB); |
| 70 | EverMadeChange |= MadeChange; |
| 71 | } |
| 72 | return EverMadeChange; |
| 73 | } |
| 74 | |
Chris Lattner | c374856 | 2007-04-02 01:35:34 +0000 | [diff] [blame^] | 75 | /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes |
| 76 | /// and an unconditional branch. Passes before isel (e.g. LSR/loopsimplify) |
| 77 | /// often split edges in ways that are non-optimal for isel. Start by |
| 78 | /// eliminating these blocks so we can split them the way we want them. |
| 79 | bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { |
| 80 | bool MadeChange = false; |
| 81 | // Note that this intentionally skips the entry block. |
| 82 | for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { |
| 83 | BasicBlock *BB = I++; |
| 84 | |
| 85 | // If this block doesn't end with an uncond branch, ignore it. |
| 86 | BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); |
| 87 | if (!BI || !BI->isUnconditional()) |
| 88 | continue; |
| 89 | |
| 90 | // If the instruction before the branch isn't a phi node, then other stuff |
| 91 | // is happening here. |
| 92 | BasicBlock::iterator BBI = BI; |
| 93 | if (BBI != BB->begin()) { |
| 94 | --BBI; |
| 95 | if (!isa<PHINode>(BBI)) continue; |
| 96 | } |
| 97 | |
| 98 | // Do not break infinite loops. |
| 99 | BasicBlock *DestBB = BI->getSuccessor(0); |
| 100 | if (DestBB == BB) |
| 101 | continue; |
| 102 | |
| 103 | if (!CanMergeBlocks(BB, DestBB)) |
| 104 | continue; |
| 105 | |
| 106 | EliminateMostlyEmptyBlock(BB); |
| 107 | MadeChange = true; |
| 108 | } |
| 109 | return MadeChange; |
| 110 | } |
| 111 | |
| 112 | /// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a |
| 113 | /// single uncond branch between them, and BB contains no other non-phi |
| 114 | /// instructions. |
| 115 | bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, |
| 116 | const BasicBlock *DestBB) const { |
| 117 | // We only want to eliminate blocks whose phi nodes are used by phi nodes in |
| 118 | // the successor. If there are more complex condition (e.g. preheaders), |
| 119 | // don't mess around with them. |
| 120 | BasicBlock::const_iterator BBI = BB->begin(); |
| 121 | while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { |
| 122 | for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end(); |
| 123 | UI != E; ++UI) { |
| 124 | const Instruction *User = cast<Instruction>(*UI); |
| 125 | if (User->getParent() != DestBB || !isa<PHINode>(User)) |
| 126 | return false; |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | // If BB and DestBB contain any common predecessors, then the phi nodes in BB |
| 131 | // and DestBB may have conflicting incoming values for the block. If so, we |
| 132 | // can't merge the block. |
| 133 | const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); |
| 134 | if (!DestBBPN) return true; // no conflict. |
| 135 | |
| 136 | // Collect the preds of BB. |
| 137 | SmallPtrSet<BasicBlock*, 16> BBPreds; |
| 138 | if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { |
| 139 | // It is faster to get preds from a PHI than with pred_iterator. |
| 140 | for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) |
| 141 | BBPreds.insert(BBPN->getIncomingBlock(i)); |
| 142 | } else { |
| 143 | BBPreds.insert(pred_begin(BB), pred_end(BB)); |
| 144 | } |
| 145 | |
| 146 | // Walk the preds of DestBB. |
| 147 | for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { |
| 148 | BasicBlock *Pred = DestBBPN->getIncomingBlock(i); |
| 149 | if (BBPreds.count(Pred)) { // Common predecessor? |
| 150 | BBI = DestBB->begin(); |
| 151 | while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { |
| 152 | const Value *V1 = PN->getIncomingValueForBlock(Pred); |
| 153 | const Value *V2 = PN->getIncomingValueForBlock(BB); |
| 154 | |
| 155 | // If V2 is a phi node in BB, look up what the mapped value will be. |
| 156 | if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) |
| 157 | if (V2PN->getParent() == BB) |
| 158 | V2 = V2PN->getIncomingValueForBlock(Pred); |
| 159 | |
| 160 | // If there is a conflict, bail out. |
| 161 | if (V1 != V2) return false; |
| 162 | } |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | return true; |
| 167 | } |
| 168 | |
| 169 | |
| 170 | /// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and |
| 171 | /// an unconditional branch in it. |
| 172 | void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { |
| 173 | BranchInst *BI = cast<BranchInst>(BB->getTerminator()); |
| 174 | BasicBlock *DestBB = BI->getSuccessor(0); |
| 175 | |
| 176 | DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; |
| 177 | |
| 178 | // If the destination block has a single pred, then this is a trivial edge, |
| 179 | // just collapse it. |
| 180 | if (DestBB->getSinglePredecessor()) { |
| 181 | // If DestBB has single-entry PHI nodes, fold them. |
| 182 | while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) { |
| 183 | PN->replaceAllUsesWith(PN->getIncomingValue(0)); |
| 184 | PN->eraseFromParent(); |
| 185 | } |
| 186 | |
| 187 | // Splice all the PHI nodes from BB over to DestBB. |
| 188 | DestBB->getInstList().splice(DestBB->begin(), BB->getInstList(), |
| 189 | BB->begin(), BI); |
| 190 | |
| 191 | // Anything that branched to BB now branches to DestBB. |
| 192 | BB->replaceAllUsesWith(DestBB); |
| 193 | |
| 194 | // Nuke BB. |
| 195 | BB->eraseFromParent(); |
| 196 | |
| 197 | DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; |
| 198 | return; |
| 199 | } |
| 200 | |
| 201 | // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB |
| 202 | // to handle the new incoming edges it is about to have. |
| 203 | PHINode *PN; |
| 204 | for (BasicBlock::iterator BBI = DestBB->begin(); |
| 205 | (PN = dyn_cast<PHINode>(BBI)); ++BBI) { |
| 206 | // Remove the incoming value for BB, and remember it. |
| 207 | Value *InVal = PN->removeIncomingValue(BB, false); |
| 208 | |
| 209 | // Two options: either the InVal is a phi node defined in BB or it is some |
| 210 | // value that dominates BB. |
| 211 | PHINode *InValPhi = dyn_cast<PHINode>(InVal); |
| 212 | if (InValPhi && InValPhi->getParent() == BB) { |
| 213 | // Add all of the input values of the input PHI as inputs of this phi. |
| 214 | for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) |
| 215 | PN->addIncoming(InValPhi->getIncomingValue(i), |
| 216 | InValPhi->getIncomingBlock(i)); |
| 217 | } else { |
| 218 | // Otherwise, add one instance of the dominating value for each edge that |
| 219 | // we will be adding. |
| 220 | if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { |
| 221 | for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) |
| 222 | PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); |
| 223 | } else { |
| 224 | for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) |
| 225 | PN->addIncoming(InVal, *PI); |
| 226 | } |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | // The PHIs are now updated, change everything that refers to BB to use |
| 231 | // DestBB and remove BB. |
| 232 | BB->replaceAllUsesWith(DestBB); |
| 233 | BB->eraseFromParent(); |
| 234 | |
| 235 | DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; |
| 236 | } |
| 237 | |
| 238 | |
Chris Lattner | f2836d1 | 2007-03-31 04:06:36 +0000 | [diff] [blame] | 239 | /// SplitEdgeNicely - Split the critical edge from TI to it's specified |
| 240 | /// successor if it will improve codegen. We only do this if the successor has |
| 241 | /// phi nodes (otherwise critical edges are ok). If there is already another |
| 242 | /// predecessor of the succ that is empty (and thus has no phi nodes), use it |
| 243 | /// instead of introducing a new block. |
| 244 | static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) { |
| 245 | BasicBlock *TIBB = TI->getParent(); |
| 246 | BasicBlock *Dest = TI->getSuccessor(SuccNum); |
| 247 | assert(isa<PHINode>(Dest->begin()) && |
| 248 | "This should only be called if Dest has a PHI!"); |
| 249 | |
| 250 | /// TIPHIValues - This array is lazily computed to determine the values of |
| 251 | /// PHIs in Dest that TI would provide. |
| 252 | std::vector<Value*> TIPHIValues; |
| 253 | |
| 254 | // Check to see if Dest has any blocks that can be used as a split edge for |
| 255 | // this terminator. |
| 256 | for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { |
| 257 | BasicBlock *Pred = *PI; |
| 258 | // To be usable, the pred has to end with an uncond branch to the dest. |
| 259 | BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); |
| 260 | if (!PredBr || !PredBr->isUnconditional() || |
| 261 | // Must be empty other than the branch. |
| 262 | &Pred->front() != PredBr) |
| 263 | continue; |
| 264 | |
| 265 | // Finally, since we know that Dest has phi nodes in it, we have to make |
| 266 | // sure that jumping to Pred will have the same affect as going to Dest in |
| 267 | // terms of PHI values. |
| 268 | PHINode *PN; |
| 269 | unsigned PHINo = 0; |
| 270 | bool FoundMatch = true; |
| 271 | for (BasicBlock::iterator I = Dest->begin(); |
| 272 | (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { |
| 273 | if (PHINo == TIPHIValues.size()) |
| 274 | TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); |
| 275 | |
| 276 | // If the PHI entry doesn't work, we can't use this pred. |
| 277 | if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { |
| 278 | FoundMatch = false; |
| 279 | break; |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | // If we found a workable predecessor, change TI to branch to Succ. |
| 284 | if (FoundMatch) { |
| 285 | Dest->removePredecessor(TIBB); |
| 286 | TI->setSuccessor(SuccNum, Pred); |
| 287 | return; |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | SplitCriticalEdge(TI, SuccNum, P, true); |
| 292 | } |
| 293 | |
| 294 | |
| 295 | /// InsertGEPComputeCode - Insert code into BB to compute Ptr+PtrOffset, |
| 296 | /// casting to the type of GEPI. |
| 297 | static Instruction *InsertGEPComputeCode(Instruction *&V, BasicBlock *BB, |
| 298 | Instruction *GEPI, Value *Ptr, |
| 299 | Value *PtrOffset) { |
| 300 | if (V) return V; // Already computed. |
| 301 | |
| 302 | // Figure out the insertion point |
| 303 | BasicBlock::iterator InsertPt; |
| 304 | if (BB == GEPI->getParent()) { |
| 305 | // If GEP is already inserted into BB, insert right after the GEP. |
| 306 | InsertPt = GEPI; |
| 307 | ++InsertPt; |
| 308 | } else { |
| 309 | // Otherwise, insert at the top of BB, after any PHI nodes |
| 310 | InsertPt = BB->begin(); |
| 311 | while (isa<PHINode>(InsertPt)) ++InsertPt; |
| 312 | } |
| 313 | |
| 314 | // If Ptr is itself a cast, but in some other BB, emit a copy of the cast into |
| 315 | // BB so that there is only one value live across basic blocks (the cast |
| 316 | // operand). |
| 317 | if (CastInst *CI = dyn_cast<CastInst>(Ptr)) |
| 318 | if (CI->getParent() != BB && isa<PointerType>(CI->getOperand(0)->getType())) |
| 319 | Ptr = CastInst::create(CI->getOpcode(), CI->getOperand(0), CI->getType(), |
| 320 | "", InsertPt); |
| 321 | |
| 322 | // Add the offset, cast it to the right type. |
| 323 | Ptr = BinaryOperator::createAdd(Ptr, PtrOffset, "", InsertPt); |
| 324 | // Ptr is an integer type, GEPI is pointer type ==> IntToPtr |
| 325 | return V = CastInst::create(Instruction::IntToPtr, Ptr, GEPI->getType(), |
| 326 | "", InsertPt); |
| 327 | } |
| 328 | |
| 329 | /// ReplaceUsesOfGEPInst - Replace all uses of RepPtr with inserted code to |
| 330 | /// compute its value. The RepPtr value can be computed with Ptr+PtrOffset. One |
| 331 | /// trivial way of doing this would be to evaluate Ptr+PtrOffset in RepPtr's |
| 332 | /// block, then ReplaceAllUsesWith'ing everything. However, we would prefer to |
| 333 | /// sink PtrOffset into user blocks where doing so will likely allow us to fold |
| 334 | /// the constant add into a load or store instruction. Additionally, if a user |
| 335 | /// is a pointer-pointer cast, we look through it to find its users. |
| 336 | static void ReplaceUsesOfGEPInst(Instruction *RepPtr, Value *Ptr, |
| 337 | Constant *PtrOffset, BasicBlock *DefBB, |
| 338 | GetElementPtrInst *GEPI, |
| 339 | std::map<BasicBlock*,Instruction*> &InsertedExprs) { |
| 340 | while (!RepPtr->use_empty()) { |
| 341 | Instruction *User = cast<Instruction>(RepPtr->use_back()); |
| 342 | |
| 343 | // If the user is a Pointer-Pointer cast, recurse. Only BitCast can be |
| 344 | // used for a Pointer-Pointer cast. |
| 345 | if (isa<BitCastInst>(User)) { |
| 346 | ReplaceUsesOfGEPInst(User, Ptr, PtrOffset, DefBB, GEPI, InsertedExprs); |
| 347 | |
| 348 | // Drop the use of RepPtr. The cast is dead. Don't delete it now, else we |
| 349 | // could invalidate an iterator. |
| 350 | User->setOperand(0, UndefValue::get(RepPtr->getType())); |
| 351 | continue; |
| 352 | } |
| 353 | |
| 354 | // If this is a load of the pointer, or a store through the pointer, emit |
| 355 | // the increment into the load/store block. |
| 356 | Instruction *NewVal; |
| 357 | if (isa<LoadInst>(User) || |
| 358 | (isa<StoreInst>(User) && User->getOperand(0) != RepPtr)) { |
| 359 | NewVal = InsertGEPComputeCode(InsertedExprs[User->getParent()], |
| 360 | User->getParent(), GEPI, |
| 361 | Ptr, PtrOffset); |
| 362 | } else { |
| 363 | // If this use is not foldable into the addressing mode, use a version |
| 364 | // emitted in the GEP block. |
| 365 | NewVal = InsertGEPComputeCode(InsertedExprs[DefBB], DefBB, GEPI, |
| 366 | Ptr, PtrOffset); |
| 367 | } |
| 368 | |
| 369 | if (GEPI->getType() != RepPtr->getType()) { |
| 370 | BasicBlock::iterator IP = NewVal; |
| 371 | ++IP; |
| 372 | // NewVal must be a GEP which must be pointer type, so BitCast |
| 373 | NewVal = new BitCastInst(NewVal, RepPtr->getType(), "", IP); |
| 374 | } |
| 375 | User->replaceUsesOfWith(RepPtr, NewVal); |
| 376 | } |
| 377 | } |
| 378 | |
| 379 | /// OptimizeGEPExpression - Since we are doing basic-block-at-a-time instruction |
| 380 | /// selection, we want to be a bit careful about some things. In particular, if |
| 381 | /// we have a GEP instruction that is used in a different block than it is |
| 382 | /// defined, the addressing expression of the GEP cannot be folded into loads or |
| 383 | /// stores that use it. In this case, decompose the GEP and move constant |
| 384 | /// indices into blocks that use it. |
| 385 | bool CodeGenPrepare::OptimizeGEPExpression(GetElementPtrInst *GEPI) { |
| 386 | // If this GEP is only used inside the block it is defined in, there is no |
| 387 | // need to rewrite it. |
| 388 | bool isUsedOutsideDefBB = false; |
| 389 | BasicBlock *DefBB = GEPI->getParent(); |
| 390 | for (Value::use_iterator UI = GEPI->use_begin(), E = GEPI->use_end(); |
| 391 | UI != E; ++UI) { |
| 392 | if (cast<Instruction>(*UI)->getParent() != DefBB) { |
| 393 | isUsedOutsideDefBB = true; |
| 394 | break; |
| 395 | } |
| 396 | } |
| 397 | if (!isUsedOutsideDefBB) return false; |
| 398 | |
| 399 | // If this GEP has no non-zero constant indices, there is nothing we can do, |
| 400 | // ignore it. |
| 401 | bool hasConstantIndex = false; |
| 402 | bool hasVariableIndex = false; |
| 403 | for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1, |
| 404 | E = GEPI->op_end(); OI != E; ++OI) { |
| 405 | if (ConstantInt *CI = dyn_cast<ConstantInt>(*OI)) { |
| 406 | if (!CI->isZero()) { |
| 407 | hasConstantIndex = true; |
| 408 | break; |
| 409 | } |
| 410 | } else { |
| 411 | hasVariableIndex = true; |
| 412 | } |
| 413 | } |
| 414 | |
| 415 | // If this is a "GEP X, 0, 0, 0", turn this into a cast. |
| 416 | if (!hasConstantIndex && !hasVariableIndex) { |
| 417 | /// The GEP operand must be a pointer, so must its result -> BitCast |
| 418 | Value *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), |
| 419 | GEPI->getName(), GEPI); |
| 420 | GEPI->replaceAllUsesWith(NC); |
| 421 | GEPI->eraseFromParent(); |
| 422 | return true; |
| 423 | } |
| 424 | |
| 425 | // If this is a GEP &Alloca, 0, 0, forward subst the frame index into uses. |
| 426 | if (!hasConstantIndex && !isa<AllocaInst>(GEPI->getOperand(0))) |
| 427 | return false; |
| 428 | |
| 429 | // If we don't have target lowering info, we can't lower the GEP. |
| 430 | if (!TLI) return false; |
| 431 | const TargetData *TD = TLI->getTargetData(); |
| 432 | |
| 433 | // Otherwise, decompose the GEP instruction into multiplies and adds. Sum the |
| 434 | // constant offset (which we now know is non-zero) and deal with it later. |
| 435 | uint64_t ConstantOffset = 0; |
| 436 | const Type *UIntPtrTy = TD->getIntPtrType(); |
| 437 | Value *Ptr = new PtrToIntInst(GEPI->getOperand(0), UIntPtrTy, "", GEPI); |
| 438 | const Type *Ty = GEPI->getOperand(0)->getType(); |
| 439 | |
| 440 | for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1, |
| 441 | E = GEPI->op_end(); OI != E; ++OI) { |
| 442 | Value *Idx = *OI; |
| 443 | if (const StructType *StTy = dyn_cast<StructType>(Ty)) { |
| 444 | unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); |
| 445 | if (Field) |
| 446 | ConstantOffset += TD->getStructLayout(StTy)->getElementOffset(Field); |
| 447 | Ty = StTy->getElementType(Field); |
| 448 | } else { |
| 449 | Ty = cast<SequentialType>(Ty)->getElementType(); |
| 450 | |
| 451 | // Handle constant subscripts. |
| 452 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { |
| 453 | if (CI->getZExtValue() == 0) continue; |
| 454 | ConstantOffset += (int64_t)TD->getTypeSize(Ty)*CI->getSExtValue(); |
| 455 | continue; |
| 456 | } |
| 457 | |
| 458 | // Ptr = Ptr + Idx * ElementSize; |
| 459 | |
| 460 | // Cast Idx to UIntPtrTy if needed. |
| 461 | Idx = CastInst::createIntegerCast(Idx, UIntPtrTy, true/*SExt*/, "", GEPI); |
| 462 | |
| 463 | uint64_t ElementSize = TD->getTypeSize(Ty); |
| 464 | // Mask off bits that should not be set. |
| 465 | ElementSize &= ~0ULL >> (64-UIntPtrTy->getPrimitiveSizeInBits()); |
| 466 | Constant *SizeCst = ConstantInt::get(UIntPtrTy, ElementSize); |
| 467 | |
| 468 | // Multiply by the element size and add to the base. |
| 469 | Idx = BinaryOperator::createMul(Idx, SizeCst, "", GEPI); |
| 470 | Ptr = BinaryOperator::createAdd(Ptr, Idx, "", GEPI); |
| 471 | } |
| 472 | } |
| 473 | |
| 474 | // Make sure that the offset fits in uintptr_t. |
| 475 | ConstantOffset &= ~0ULL >> (64-UIntPtrTy->getPrimitiveSizeInBits()); |
| 476 | Constant *PtrOffset = ConstantInt::get(UIntPtrTy, ConstantOffset); |
| 477 | |
| 478 | // Okay, we have now emitted all of the variable index parts to the BB that |
| 479 | // the GEP is defined in. Loop over all of the using instructions, inserting |
| 480 | // an "add Ptr, ConstantOffset" into each block that uses it and update the |
| 481 | // instruction to use the newly computed value, making GEPI dead. When the |
| 482 | // user is a load or store instruction address, we emit the add into the user |
| 483 | // block, otherwise we use a canonical version right next to the gep (these |
| 484 | // won't be foldable as addresses, so we might as well share the computation). |
| 485 | |
| 486 | std::map<BasicBlock*,Instruction*> InsertedExprs; |
| 487 | ReplaceUsesOfGEPInst(GEPI, Ptr, PtrOffset, DefBB, GEPI, InsertedExprs); |
| 488 | |
| 489 | // Finally, the GEP is dead, remove it. |
| 490 | GEPI->eraseFromParent(); |
| 491 | |
| 492 | return true; |
| 493 | } |
| 494 | |
| 495 | /// SinkInvariantGEPIndex - If a GEP instruction has a variable index that has |
| 496 | /// been hoisted out of the loop by LICM pass, sink it back into the use BB |
| 497 | /// if it can be determined that the index computation can be folded into the |
| 498 | /// addressing mode of the load / store uses. |
| 499 | static bool SinkInvariantGEPIndex(BinaryOperator *BinOp, |
| 500 | const TargetLowering &TLI) { |
| 501 | // Only look at Add. |
| 502 | if (BinOp->getOpcode() != Instruction::Add) |
| 503 | return false; |
| 504 | |
| 505 | // DestBBs - These are the blocks where a copy of BinOp will be inserted. |
| 506 | SmallSet<BasicBlock*, 8> DestBBs; |
| 507 | BasicBlock *DefBB = BinOp->getParent(); |
| 508 | bool MadeChange = false; |
| 509 | for (Value::use_iterator UI = BinOp->use_begin(), E = BinOp->use_end(); |
| 510 | UI != E; ++UI) { |
| 511 | Instruction *GEPI = cast<Instruction>(*UI); |
| 512 | // Only look for GEP use in another block. |
| 513 | if (GEPI->getParent() == DefBB) continue; |
| 514 | |
| 515 | if (isa<GetElementPtrInst>(GEPI)) { |
| 516 | // If the GEP has another variable index, abondon. |
| 517 | bool hasVariableIndex = false; |
| 518 | for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1, |
| 519 | OE = GEPI->op_end(); OI != OE; ++OI) |
| 520 | if (*OI != BinOp && !isa<ConstantInt>(*OI)) { |
| 521 | hasVariableIndex = true; |
| 522 | break; |
| 523 | } |
| 524 | if (hasVariableIndex) |
| 525 | break; |
| 526 | |
| 527 | BasicBlock *GEPIBB = GEPI->getParent(); |
| 528 | for (Value::use_iterator UUI = GEPI->use_begin(), UE = GEPI->use_end(); |
| 529 | UUI != UE; ++UUI) { |
| 530 | Instruction *GEPIUser = cast<Instruction>(*UUI); |
| 531 | const Type *UseTy = NULL; |
| 532 | if (LoadInst *Load = dyn_cast<LoadInst>(GEPIUser)) |
| 533 | UseTy = Load->getType(); |
| 534 | else if (StoreInst *Store = dyn_cast<StoreInst>(GEPIUser)) |
| 535 | UseTy = Store->getOperand(0)->getType(); |
| 536 | |
| 537 | // Check if it is possible to fold the expression to address mode. |
| 538 | if (UseTy && isa<ConstantInt>(BinOp->getOperand(1))) { |
| 539 | uint64_t Scale = TLI.getTargetData()->getTypeSize(UseTy); |
| 540 | int64_t Cst = cast<ConstantInt>(BinOp->getOperand(1))->getSExtValue(); |
| 541 | // e.g. load (gep i32 * %P, (X+42)) => load (%P + X*4 + 168). |
| 542 | if (TLI.isLegalAddressImmediate(Cst*Scale, UseTy) && |
| 543 | (Scale == 1 || TLI.isLegalAddressScale(Scale, UseTy))) { |
| 544 | DestBBs.insert(GEPIBB); |
| 545 | MadeChange = true; |
| 546 | break; |
| 547 | } |
| 548 | } |
| 549 | } |
| 550 | } |
| 551 | } |
| 552 | |
| 553 | // Nothing to do. |
| 554 | if (!MadeChange) |
| 555 | return false; |
| 556 | |
| 557 | /// InsertedOps - Only insert a duplicate in each block once. |
| 558 | std::map<BasicBlock*, BinaryOperator*> InsertedOps; |
| 559 | for (Value::use_iterator UI = BinOp->use_begin(), E = BinOp->use_end(); |
| 560 | UI != E; ) { |
| 561 | Instruction *User = cast<Instruction>(*UI); |
| 562 | BasicBlock *UserBB = User->getParent(); |
| 563 | |
| 564 | // Preincrement use iterator so we don't invalidate it. |
| 565 | ++UI; |
| 566 | |
| 567 | // If any user in this BB wants it, replace all the uses in the BB. |
| 568 | if (DestBBs.count(UserBB)) { |
| 569 | // Sink it into user block. |
| 570 | BinaryOperator *&InsertedOp = InsertedOps[UserBB]; |
| 571 | if (!InsertedOp) { |
| 572 | BasicBlock::iterator InsertPt = UserBB->begin(); |
| 573 | while (isa<PHINode>(InsertPt)) ++InsertPt; |
| 574 | |
| 575 | InsertedOp = |
| 576 | BinaryOperator::create(BinOp->getOpcode(), BinOp->getOperand(0), |
| 577 | BinOp->getOperand(1), "", InsertPt); |
| 578 | } |
| 579 | |
| 580 | User->replaceUsesOfWith(BinOp, InsertedOp); |
| 581 | } |
| 582 | } |
| 583 | |
| 584 | if (BinOp->use_empty()) |
| 585 | BinOp->eraseFromParent(); |
| 586 | |
| 587 | return true; |
| 588 | } |
| 589 | |
| 590 | /// OptimizeNoopCopyExpression - We have determined that the specified cast |
| 591 | /// instruction is a noop copy (e.g. it's casting from one pointer type to |
| 592 | /// another, int->uint, or int->sbyte on PPC. |
| 593 | /// |
| 594 | /// Return true if any changes are made. |
| 595 | static bool OptimizeNoopCopyExpression(CastInst *CI) { |
| 596 | BasicBlock *DefBB = CI->getParent(); |
| 597 | |
| 598 | /// InsertedCasts - Only insert a cast in each block once. |
| 599 | std::map<BasicBlock*, CastInst*> InsertedCasts; |
| 600 | |
| 601 | bool MadeChange = false; |
| 602 | for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); |
| 603 | UI != E; ) { |
| 604 | Use &TheUse = UI.getUse(); |
| 605 | Instruction *User = cast<Instruction>(*UI); |
| 606 | |
| 607 | // Figure out which BB this cast is used in. For PHI's this is the |
| 608 | // appropriate predecessor block. |
| 609 | BasicBlock *UserBB = User->getParent(); |
| 610 | if (PHINode *PN = dyn_cast<PHINode>(User)) { |
| 611 | unsigned OpVal = UI.getOperandNo()/2; |
| 612 | UserBB = PN->getIncomingBlock(OpVal); |
| 613 | } |
| 614 | |
| 615 | // Preincrement use iterator so we don't invalidate it. |
| 616 | ++UI; |
| 617 | |
| 618 | // If this user is in the same block as the cast, don't change the cast. |
| 619 | if (UserBB == DefBB) continue; |
| 620 | |
| 621 | // If we have already inserted a cast into this block, use it. |
| 622 | CastInst *&InsertedCast = InsertedCasts[UserBB]; |
| 623 | |
| 624 | if (!InsertedCast) { |
| 625 | BasicBlock::iterator InsertPt = UserBB->begin(); |
| 626 | while (isa<PHINode>(InsertPt)) ++InsertPt; |
| 627 | |
| 628 | InsertedCast = |
| 629 | CastInst::create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", |
| 630 | InsertPt); |
| 631 | MadeChange = true; |
| 632 | } |
| 633 | |
| 634 | // Replace a use of the cast with a use of the new casat. |
| 635 | TheUse = InsertedCast; |
| 636 | } |
| 637 | |
| 638 | // If we removed all uses, nuke the cast. |
| 639 | if (CI->use_empty()) |
| 640 | CI->eraseFromParent(); |
| 641 | |
| 642 | return MadeChange; |
| 643 | } |
| 644 | |
| 645 | |
| 646 | |
| 647 | // In this pass we look for GEP and cast instructions that are used |
| 648 | // across basic blocks and rewrite them to improve basic-block-at-a-time |
| 649 | // selection. |
| 650 | bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { |
| 651 | bool MadeChange = false; |
| 652 | |
| 653 | // Split all critical edges where the dest block has a PHI and where the phi |
| 654 | // has shared immediate operands. |
| 655 | TerminatorInst *BBTI = BB.getTerminator(); |
| 656 | if (BBTI->getNumSuccessors() > 1) { |
| 657 | for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) |
| 658 | if (isa<PHINode>(BBTI->getSuccessor(i)->begin()) && |
| 659 | isCriticalEdge(BBTI, i, true)) |
| 660 | SplitEdgeNicely(BBTI, i, this); |
| 661 | } |
| 662 | |
| 663 | |
| 664 | for (BasicBlock::iterator BBI = BB.begin(), E = BB.end(); BBI != E; ) { |
| 665 | Instruction *I = BBI++; |
| 666 | |
| 667 | if (CallInst *CI = dyn_cast<CallInst>(I)) { |
| 668 | // If we found an inline asm expession, and if the target knows how to |
| 669 | // lower it to normal LLVM code, do so now. |
| 670 | if (TLI && isa<InlineAsm>(CI->getCalledValue())) |
| 671 | if (const TargetAsmInfo *TAI = |
| 672 | TLI->getTargetMachine().getTargetAsmInfo()) { |
| 673 | if (TAI->ExpandInlineAsm(CI)) |
| 674 | BBI = BB.begin(); |
| 675 | } |
| 676 | } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { |
| 677 | MadeChange |= OptimizeGEPExpression(GEPI); |
| 678 | } else if (CastInst *CI = dyn_cast<CastInst>(I)) { |
| 679 | // If the source of the cast is a constant, then this should have |
| 680 | // already been constant folded. The only reason NOT to constant fold |
| 681 | // it is if something (e.g. LSR) was careful to place the constant |
| 682 | // evaluation in a block other than then one that uses it (e.g. to hoist |
| 683 | // the address of globals out of a loop). If this is the case, we don't |
| 684 | // want to forward-subst the cast. |
| 685 | if (isa<Constant>(CI->getOperand(0))) |
| 686 | continue; |
| 687 | |
| 688 | if (!TLI) continue; |
| 689 | |
| 690 | // If this is a noop copy, sink it into user blocks to reduce the number |
| 691 | // of virtual registers that must be created and coallesced. |
| 692 | MVT::ValueType SrcVT = TLI->getValueType(CI->getOperand(0)->getType()); |
| 693 | MVT::ValueType DstVT = TLI->getValueType(CI->getType()); |
| 694 | |
| 695 | // This is an fp<->int conversion? |
| 696 | if (MVT::isInteger(SrcVT) != MVT::isInteger(DstVT)) |
| 697 | continue; |
| 698 | |
| 699 | // If this is an extension, it will be a zero or sign extension, which |
| 700 | // isn't a noop. |
| 701 | if (SrcVT < DstVT) continue; |
| 702 | |
| 703 | // If these values will be promoted, find out what they will be promoted |
| 704 | // to. This helps us consider truncates on PPC as noop copies when they |
| 705 | // are. |
| 706 | if (TLI->getTypeAction(SrcVT) == TargetLowering::Promote) |
| 707 | SrcVT = TLI->getTypeToTransformTo(SrcVT); |
| 708 | if (TLI->getTypeAction(DstVT) == TargetLowering::Promote) |
| 709 | DstVT = TLI->getTypeToTransformTo(DstVT); |
| 710 | |
| 711 | // If, after promotion, these are the same types, this is a noop copy. |
| 712 | if (SrcVT == DstVT) |
| 713 | MadeChange |= OptimizeNoopCopyExpression(CI); |
| 714 | } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I)) { |
| 715 | if (TLI) |
| 716 | MadeChange |= SinkInvariantGEPIndex(BinOp, *TLI); |
| 717 | } |
| 718 | } |
| 719 | return MadeChange; |
| 720 | } |
| 721 | |