It's not necessary to do rounding for alloca operations when the requested
alignment is equal to the stack alignment.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 0000000..520cfeb
--- /dev/null
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,175 @@
+//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constant.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Dominators.h"
+#include <algorithm>
+using namespace llvm;
+
+/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
+/// with a value, then remove and delete the original instruction.
+///
+void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+                                BasicBlock::iterator &BI, Value *V) {
+  Instruction &I = *BI;
+  // Replaces all of the uses of the instruction with uses of the value
+  I.replaceAllUsesWith(V);
+
+  // Make sure to propagate a name if there is one already.
+  if (I.hasName() && !V->hasName())
+    V->takeName(&I);
+
+  // Delete the unnecessary instruction now...
+  BI = BIL.erase(BI);
+}
+
+
+/// ReplaceInstWithInst - Replace the instruction specified by BI with the
+/// instruction specified by I.  The original instruction is deleted and BI is
+/// updated to point to the new instruction.
+///
+void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+                               BasicBlock::iterator &BI, Instruction *I) {
+  assert(I->getParent() == 0 &&
+         "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+  // Insert the new instruction into the basic block...
+  BasicBlock::iterator New = BIL.insert(BI, I);
+
+  // Replace all uses of the old instruction, and delete it.
+  ReplaceInstWithValue(BIL, BI, I);
+
+  // Move BI back to point to the newly inserted instruction
+  BI = New;
+}
+
+/// ReplaceInstWithInst - Replace the instruction specified by From with the
+/// instruction specified by To.
+///
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+  BasicBlock::iterator BI(From);
+  ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
+}
+
+/// RemoveSuccessor - Change the specified terminator instruction such that its
+/// successor SuccNum no longer exists.  Because this reduces the outgoing
+/// degree of the current basic block, the actual terminator instruction itself
+/// may have to be changed.  In the case where the last successor of the block 
+/// is deleted, a return instruction is inserted in its place which can cause a
+/// surprising change in program behavior if it is not expected.
+///
+void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
+  assert(SuccNum < TI->getNumSuccessors() &&
+         "Trying to remove a nonexistant successor!");
+
+  // If our old successor block contains any PHI nodes, remove the entry in the
+  // PHI nodes that comes from this branch...
+  //
+  BasicBlock *BB = TI->getParent();
+  TI->getSuccessor(SuccNum)->removePredecessor(BB);
+
+  TerminatorInst *NewTI = 0;
+  switch (TI->getOpcode()) {
+  case Instruction::Br:
+    // If this is a conditional branch... convert to unconditional branch.
+    if (TI->getNumSuccessors() == 2) {
+      cast<BranchInst>(TI)->setUnconditionalDest(TI->getSuccessor(1-SuccNum));
+    } else {                    // Otherwise convert to a return instruction...
+      Value *RetVal = 0;
+
+      // Create a value to return... if the function doesn't return null...
+      if (BB->getParent()->getReturnType() != Type::VoidTy)
+        RetVal = Constant::getNullValue(BB->getParent()->getReturnType());
+
+      // Create the return...
+      NewTI = new ReturnInst(RetVal);
+    }
+    break;
+
+  case Instruction::Invoke:    // Should convert to call
+  case Instruction::Switch:    // Should remove entry
+  default:
+  case Instruction::Ret:       // Cannot happen, has no successors!
+    assert(0 && "Unhandled terminator instruction type in RemoveSuccessor!");
+    abort();
+  }
+
+  if (NewTI)   // If it's a different instruction, replace.
+    ReplaceInstWithInst(TI, NewTI);
+}
+
+/// SplitEdge -  Split the edge connecting specified block. Pass P must 
+/// not be NULL. 
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+  TerminatorInst *LatchTerm = BB->getTerminator();
+  unsigned SuccNum = 0;
+  for (unsigned i = 0, e = LatchTerm->getNumSuccessors(); ; ++i) {
+    assert(i != e && "Didn't find edge?");
+    if (LatchTerm->getSuccessor(i) == Succ) {
+      SuccNum = i;
+      break;
+    }
+  }
+  
+  // If this is a critical edge, let SplitCriticalEdge do it.
+  if (SplitCriticalEdge(BB->getTerminator(), SuccNum, P))
+    return LatchTerm->getSuccessor(SuccNum);
+
+  // If the edge isn't critical, then BB has a single successor or Succ has a
+  // single pred.  Split the block.
+  BasicBlock::iterator SplitPoint;
+  if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+    // If the successor only has a single pred, split the top of the successor
+    // block.
+    assert(SP == BB && "CFG broken");
+    return SplitBlock(Succ, Succ->begin(), P);
+  } else {
+    // Otherwise, if BB has a single successor, split it at the bottom of the
+    // block.
+    assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+           "Should have a single succ!"); 
+    return SplitBlock(BB, BB->getTerminator(), P);
+  }
+}
+
+/// SplitBlock - Split the specified block at the specified instruction - every
+/// thing before SplitPt stays in Old and everything starting with SplitPt moves
+/// to a new block.  The two blocks are joined by an unconditional branch and
+/// the loop info is updated.
+///
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+
+  LoopInfo &LI = P->getAnalysis<LoopInfo>();
+  BasicBlock::iterator SplitIt = SplitPt;
+  while (isa<PHINode>(SplitIt))
+    ++SplitIt;
+  BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+
+  // The new block lives in whichever loop the old one did.
+  if (Loop *L = LI.getLoopFor(Old))
+    L->addBasicBlockToLoop(New, LI);
+
+  if (DominatorTree *DT = P->getAnalysisToUpdate<DominatorTree>())
+    DT->addNewBlock(New, Old);
+
+  if (DominanceFrontier *DF = P->getAnalysisToUpdate<DominanceFrontier>())
+    DF->splitBlock(Old);
+    
+  return New;
+}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 0000000..af9a114
--- /dev/null
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,269 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block.  This pass may be "required" by passes that
+// cannot deal with critical edges.  For this usage, the structure type is
+// forward declared.  This pass obviously invalidates the CFG, but can update
+// forward dominator (set, immediate dominators, tree, and frontier)
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "break-crit-edges"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+  struct VISIBILITY_HIDDEN BreakCriticalEdges : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    BreakCriticalEdges() : FunctionPass((intptr_t)&ID) {}
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<DominanceFrontier>();
+      AU.addPreserved<LoopInfo>();
+
+      // No loop canonicalization guarantees are broken by this pass.
+      AU.addPreservedID(LoopSimplifyID);
+    }
+  };
+
+  char BreakCriticalEdges::ID = 0;
+  RegisterPass<BreakCriticalEdges> X("break-crit-edges",
+                                    "Break critical edges in CFG");
+}
+
+// Publically exposed interface to pass...
+const PassInfo *llvm::BreakCriticalEdgesID = X.getPassInfo();
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+  return new BreakCriticalEdges();
+}
+
+// runOnFunction - Loop over all of the edges in the CFG, breaking critical
+// edges as they are found.
+//
+bool BreakCriticalEdges::runOnFunction(Function &F) {
+  bool Changed = false;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    TerminatorInst *TI = I->getTerminator();
+    if (TI->getNumSuccessors() > 1)
+      for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+        if (SplitCriticalEdge(TI, i, this)) {
+          ++NumBroken;
+          Changed = true;
+        }
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//    Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+// isCriticalEdge - Return true if the specified edge is a critical edge.
+// Critical edges are edges from a block with multiple successors to a block
+// with multiple predecessors.
+//
+bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+                          bool AllowIdenticalEdges) {
+  assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+  if (TI->getNumSuccessors() == 1) return false;
+
+  const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+  pred_const_iterator I = pred_begin(Dest), E = pred_end(Dest);
+
+  // If there is more than one predecessor, this is a critical edge...
+  assert(I != E && "No preds, but we have an edge to the block?");
+  const BasicBlock *FirstPred = *I;
+  ++I;        // Skip one edge due to the incoming arc from TI.
+  if (!AllowIdenticalEdges)
+    return I != E;
+  
+  // If AllowIdenticalEdges is true, then we allow this edge to be considered
+  // non-critical iff all preds come from TI's block.
+  for (; I != E; ++I)
+    if (*I != FirstPred) return true;
+  return false;
+}
+
+// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
+// split the critical edge.  This will update DominatorTree, and DominatorFrontier 
+// information if it is available, thus calling this pass will not invalidate 
+// any of them.  This returns true if the edge was split, false otherwise. 
+// This ensures that all edges to that dest go to one block instead of each 
+// going to a different block.
+//
+bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,
+                             bool MergeIdenticalEdges) {
+  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return false;
+  BasicBlock *TIBB = TI->getParent();
+  BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+  // Create a new basic block, linking it into the CFG.
+  BasicBlock *NewBB = new BasicBlock(TIBB->getName() + "." +
+                                     DestBB->getName() + "_crit_edge");
+  // Create our unconditional branch...
+  new BranchInst(DestBB, NewBB);
+
+  // Branch to the new block, breaking the edge.
+  TI->setSuccessor(SuccNum, NewBB);
+
+  // Insert the block into the function... right after the block TI lives in.
+  Function &F = *TIBB->getParent();
+  Function::iterator FBBI = TIBB;
+  F.getBasicBlockList().insert(++FBBI, NewBB);
+  
+  // If there are any PHI nodes in DestBB, we need to update them so that they
+  // merge incoming values from NewBB instead of from TIBB.
+  //
+  for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    // We no longer enter through TIBB, now we come in through NewBB.  Revector
+    // exactly one entry in the PHI node that used to come from TIBB to come
+    // from NewBB.
+    int BBIdx = PN->getBasicBlockIndex(TIBB);
+    PN->setIncomingBlock(BBIdx, NewBB);
+  }
+  
+  // If there are any other edges from TIBB to DestBB, update those to go
+  // through the split block, making those edges non-critical as well (and
+  // reducing the number of phi entries in the DestBB if relevant).
+  if (MergeIdenticalEdges) {
+    for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+      if (TI->getSuccessor(i) != DestBB) continue;
+      
+      // Remove an entry for TIBB from DestBB phi nodes.
+      DestBB->removePredecessor(TIBB);
+      
+      // We found another edge to DestBB, go to NewBB instead.
+      TI->setSuccessor(i, NewBB);
+    }
+  }
+  
+  
+
+  // If we don't have a pass object, we can't update anything...
+  if (P == 0) return true;
+
+  // Now update analysis information.  Since the only predecessor of NewBB is
+  // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
+  // anything, as there are other successors of DestBB.  However, if all other
+  // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
+  // loop header) then NewBB dominates DestBB.
+  SmallVector<BasicBlock*, 8> OtherPreds;
+
+  for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I)
+    if (*I != NewBB)
+      OtherPreds.push_back(*I);
+  
+  bool NewBBDominatesDestBB = true;
+  
+  // Should we update DominatorTree information?
+  if (DominatorTree *DT = P->getAnalysisToUpdate<DominatorTree>()) {
+    DomTreeNode *TINode = DT->getNode(TIBB);
+
+    // The new block is not the immediate dominator for any other nodes, but
+    // TINode is the immediate dominator for the new node.
+    //
+    if (TINode) {       // Don't break unreachable code!
+      DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
+      DomTreeNode *DestBBNode = 0;
+     
+      // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
+      if (!OtherPreds.empty()) {
+        DestBBNode = DT->getNode(DestBB);
+        while (!OtherPreds.empty() && NewBBDominatesDestBB) {
+          if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
+            NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
+          OtherPreds.pop_back();
+        }
+        OtherPreds.clear();
+      }
+      
+      // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
+      // doesn't dominate anything.
+      if (NewBBDominatesDestBB) {
+        if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
+        DT->changeImmediateDominator(DestBBNode, NewBBNode);
+      }
+    }
+  }
+
+  // Should we update DominanceFrontier information?
+  if (DominanceFrontier *DF = P->getAnalysisToUpdate<DominanceFrontier>()) {
+    // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
+    if (!OtherPreds.empty()) {
+      // FIXME: IMPLEMENT THIS!
+      assert(0 && "Requiring domfrontiers but not idom/domtree/domset."
+             " not implemented yet!");
+    }
+    
+    // Since the new block is dominated by its only predecessor TIBB,
+    // it cannot be in any block's dominance frontier.  If NewBB dominates
+    // DestBB, its dominance frontier is the same as DestBB's, otherwise it is
+    // just {DestBB}.
+    DominanceFrontier::DomSetType NewDFSet;
+    if (NewBBDominatesDestBB) {
+      DominanceFrontier::iterator I = DF->find(DestBB);
+      if (I != DF->end())
+        DF->addBasicBlock(NewBB, I->second);
+      else
+        DF->addBasicBlock(NewBB, DominanceFrontier::DomSetType());
+    } else {
+      DominanceFrontier::DomSetType NewDFSet;
+      NewDFSet.insert(DestBB);
+      DF->addBasicBlock(NewBB, NewDFSet);
+    }
+  }
+  
+  // Update LoopInfo if it is around.
+  if (LoopInfo *LI = P->getAnalysisToUpdate<LoopInfo>()) {
+    // If one or the other blocks were not in a loop, the new block is not
+    // either, and thus LI doesn't need to be updated.
+    if (Loop *TIL = LI->getLoopFor(TIBB))
+      if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+        if (TIL == DestLoop) {
+          // Both in the same loop, the NewBB joins loop.
+          DestLoop->addBasicBlockToLoop(NewBB, *LI);
+        } else if (TIL->contains(DestLoop->getHeader())) {
+          // Edge from an outer loop to an inner loop.  Add to the outer loop.
+          TIL->addBasicBlockToLoop(NewBB, *LI);
+        } else if (DestLoop->contains(TIL->getHeader())) {
+          // Edge from an inner loop to an outer loop.  Add to the outer loop.
+          DestLoop->addBasicBlockToLoop(NewBB, *LI);
+        } else {
+          // Edge from two loops with no containment relation.  Because these
+          // are natural loops, we know that the destination block must be the
+          // header of its loop (adding a branch into a loop elsewhere would
+          // create an irreducible loop).
+          assert(DestLoop->getHeader() == DestBB &&
+                 "Should not create irreducible loops!");
+          if (Loop *P = DestLoop->getParentLoop())
+            P->addBasicBlockToLoop(NewBB, *LI);
+        }
+      }
+  }
+  return true;
+}
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 0000000..cff58ab
--- /dev/null
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,485 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner.  This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "ValueMapper.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+using namespace llvm;
+
+// CloneBasicBlock - See comments in Cloning.h
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
+                                  DenseMap<const Value*, Value*> &ValueMap,
+                                  const char *NameSuffix, Function *F,
+                                  ClonedCodeInfo *CodeInfo) {
+  BasicBlock *NewBB = new BasicBlock("", F);
+  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+  bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+  
+  // Loop over all instructions, and copy them over.
+  for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
+       II != IE; ++II) {
+    Instruction *NewInst = II->clone();
+    if (II->hasName())
+      NewInst->setName(II->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    ValueMap[II] = NewInst;                // Add instruction map to value.
+    
+    hasCalls |= isa<CallInst>(II);
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (isa<ConstantInt>(AI->getArraySize()))
+        hasStaticAllocas = true;
+      else
+        hasDynamicAllocas = true;
+    }
+  }
+  
+  if (CodeInfo) {
+    CodeInfo->ContainsCalls          |= hasCalls;
+    CodeInfo->ContainsUnwinds        |= isa<UnwindInst>(BB->getTerminator());
+    CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
+                                        BB != &BB->getParent()->getEntryBlock();
+  }
+  return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// ArgMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                             DenseMap<const Value*, Value*> &ValueMap,
+                             std::vector<ReturnInst*> &Returns,
+                             const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
+  assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+  for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
+       E = OldFunc->arg_end(); I != E; ++I)
+    assert(ValueMap.count(I) && "No mapping from source argument specified!");
+#endif
+
+  // Loop over all of the basic blocks in the function, cloning them as
+  // appropriate.  Note that we save BE this way in order to handle cloning of
+  // recursive functions into themselves.
+  //
+  for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+       BI != BE; ++BI) {
+    const BasicBlock &BB = *BI;
+
+    // Create a new basic block and copy instructions into it!
+    BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc,
+                                      CodeInfo);
+    ValueMap[&BB] = CBB;                       // Add basic block mapping.
+
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+      Returns.push_back(RI);
+  }
+
+  // Loop over all of the instructions in the function, fixing up operand
+  // references as we go.  This uses ValueMap to do all the hard work.
+  //
+  for (Function::iterator BB = cast<BasicBlock>(ValueMap[OldFunc->begin()]),
+         BE = NewFunc->end(); BB != BE; ++BB)
+    // Loop over all instructions, fixing each one as we find it...
+    for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+      RemapInstruction(II, ValueMap);
+}
+
+/// CloneFunction - Return a copy of the specified function, but without
+/// embedding the function into another module.  Also, any references specified
+/// in the ValueMap are changed to refer to their mapped value instead of the
+/// original one.  If any of the arguments to the function are in the ValueMap,
+/// the arguments are deleted from the resultant function.  The ValueMap is
+/// updated to include mappings from all of the instructions and basicblocks in
+/// the function from their old to new values.
+///
+Function *llvm::CloneFunction(const Function *F,
+                              DenseMap<const Value*, Value*> &ValueMap,
+                              ClonedCodeInfo *CodeInfo) {
+  std::vector<const Type*> ArgTypes;
+
+  // The user might be deleting arguments to the function by specifying them in
+  // the ValueMap.  If so, we need to not add the arguments to the arg ty vector
+  //
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I)
+    if (ValueMap.count(I) == 0)  // Haven't mapped the argument to anything yet?
+      ArgTypes.push_back(I->getType());
+
+  // Create a new function type...
+  FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
+                                    ArgTypes, F->getFunctionType()->isVarArg());
+
+  // Create the new function...
+  Function *NewF = new Function(FTy, F->getLinkage(), F->getName());
+
+  // Loop over the arguments, copying the names of the mapped arguments over...
+  Function::arg_iterator DestI = NewF->arg_begin();
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I)
+    if (ValueMap.count(I) == 0) {   // Is this argument preserved?
+      DestI->setName(I->getName()); // Copy the name over...
+      ValueMap[I] = DestI++;        // Add mapping to ValueMap
+    }
+
+  std::vector<ReturnInst*> Returns;  // Ignore returns cloned...
+  CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo);
+  return NewF;
+}
+
+
+
+namespace {
+  /// PruningFunctionCloner - This class is a private class used to implement
+  /// the CloneAndPruneFunctionInto method.
+  struct VISIBILITY_HIDDEN PruningFunctionCloner {
+    Function *NewFunc;
+    const Function *OldFunc;
+    DenseMap<const Value*, Value*> &ValueMap;
+    std::vector<ReturnInst*> &Returns;
+    const char *NameSuffix;
+    ClonedCodeInfo *CodeInfo;
+    const TargetData *TD;
+
+  public:
+    PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+                          DenseMap<const Value*, Value*> &valueMap,
+                          std::vector<ReturnInst*> &returns,
+                          const char *nameSuffix, 
+                          ClonedCodeInfo *codeInfo,
+                          const TargetData *td)
+    : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns),
+      NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+    }
+
+    /// CloneBlock - The specified block is found to be reachable, clone it and
+    /// anything that it can reach.
+    void CloneBlock(const BasicBlock *BB,
+                    std::vector<const BasicBlock*> &ToClone);
+    
+  public:
+    /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+    /// mapping its operands through ValueMap if they are available.
+    Constant *ConstantFoldMappedInstruction(const Instruction *I);
+  };
+}
+
+/// CloneBlock - The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+                                       std::vector<const BasicBlock*> &ToClone){
+  Value *&BBEntry = ValueMap[BB];
+
+  // Have we already cloned this block?
+  if (BBEntry) return;
+  
+  // Nope, clone it now.
+  BasicBlock *NewBB;
+  BBEntry = NewBB = new BasicBlock();
+  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+  bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+  
+  // Loop over all instructions, and copy them over, DCE'ing as we go.  This
+  // loop doesn't include the terminator.
+  for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+       II != IE; ++II) {
+    // If this instruction constant folds, don't bother cloning the instruction,
+    // instead, just add the constant to the value map.
+    if (Constant *C = ConstantFoldMappedInstruction(II)) {
+      ValueMap[II] = C;
+      continue;
+    }
+    
+    Instruction *NewInst = II->clone();
+    if (II->hasName())
+      NewInst->setName(II->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    ValueMap[II] = NewInst;                // Add instruction map to value.
+    
+    hasCalls |= isa<CallInst>(II);
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (isa<ConstantInt>(AI->getArraySize()))
+        hasStaticAllocas = true;
+      else
+        hasDynamicAllocas = true;
+    }
+  }
+  
+  // Finally, clone over the terminator.
+  const TerminatorInst *OldTI = BB->getTerminator();
+  bool TerminatorDone = false;
+  if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+    if (BI->isConditional()) {
+      // If the condition was a known constant in the callee...
+      ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+      // Or is a known constant in the caller...
+      if (Cond == 0)  
+        Cond = dyn_cast_or_null<ConstantInt>(ValueMap[BI->getCondition()]);
+
+      // Constant fold to uncond branch!
+      if (Cond) {
+        BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+        ValueMap[OldTI] = new BranchInst(Dest, NewBB);
+        ToClone.push_back(Dest);
+        TerminatorDone = true;
+      }
+    }
+  } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+    // If switching on a value known constant in the caller.
+    ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+    if (Cond == 0)  // Or known constant after constant prop in the callee...
+      Cond = dyn_cast_or_null<ConstantInt>(ValueMap[SI->getCondition()]);
+    if (Cond) {     // Constant fold to uncond branch!
+      BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
+      ValueMap[OldTI] = new BranchInst(Dest, NewBB);
+      ToClone.push_back(Dest);
+      TerminatorDone = true;
+    }
+  }
+  
+  if (!TerminatorDone) {
+    Instruction *NewInst = OldTI->clone();
+    if (OldTI->hasName())
+      NewInst->setName(OldTI->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    ValueMap[OldTI] = NewInst;             // Add instruction map to value.
+    
+    // Recursively clone any reachable successor blocks.
+    const TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      ToClone.push_back(TI->getSuccessor(i));
+  }
+  
+  if (CodeInfo) {
+    CodeInfo->ContainsCalls          |= hasCalls;
+    CodeInfo->ContainsUnwinds        |= isa<UnwindInst>(OldTI);
+    CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
+      BB != &BB->getParent()->front();
+  }
+  
+  if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator()))
+    Returns.push_back(RI);
+}
+
+/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+/// mapping its operands through ValueMap if they are available.
+Constant *PruningFunctionCloner::
+ConstantFoldMappedInstruction(const Instruction *I) {
+  SmallVector<Constant*, 8> Ops;
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
+                                                           ValueMap)))
+      Ops.push_back(Op);
+    else
+      return 0;  // All operands not constant!
+
+  return ConstantFoldInstOperands(I, &Ops[0], Ops.size(), TD);
+}
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly.  The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead.  Since this doesn't produce an exactly copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                                     DenseMap<const Value*, Value*> &ValueMap,
+                                     std::vector<ReturnInst*> &Returns,
+                                     const char *NameSuffix, 
+                                     ClonedCodeInfo *CodeInfo,
+                                     const TargetData *TD) {
+  assert(NameSuffix && "NameSuffix cannot be null!");
+  
+#ifndef NDEBUG
+  for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
+       E = OldFunc->arg_end(); II != E; ++II)
+    assert(ValueMap.count(II) && "No mapping from source argument specified!");
+#endif
+  
+  PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns, 
+                            NameSuffix, CodeInfo, TD);
+
+  // Clone the entry block, and anything recursively reachable from it.
+  std::vector<const BasicBlock*> CloneWorklist;
+  CloneWorklist.push_back(&OldFunc->getEntryBlock());
+  while (!CloneWorklist.empty()) {
+    const BasicBlock *BB = CloneWorklist.back();
+    CloneWorklist.pop_back();
+    PFC.CloneBlock(BB, CloneWorklist);
+  }
+  
+  // Loop over all of the basic blocks in the old function.  If the block was
+  // reachable, we have cloned it and the old block is now in the value map:
+  // insert it into the new function in the right order.  If not, ignore it.
+  //
+  // Defer PHI resolution until rest of function is resolved.
+  std::vector<const PHINode*> PHIToResolve;
+  for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+       BI != BE; ++BI) {
+    BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]);
+    if (NewBB == 0) continue;  // Dead block.
+
+    // Add the new block to the new function.
+    NewFunc->getBasicBlockList().push_back(NewBB);
+    
+    // Loop over all of the instructions in the block, fixing up operand
+    // references as we go.  This uses ValueMap to do all the hard work.
+    //
+    BasicBlock::iterator I = NewBB->begin();
+    
+    // Handle PHI nodes specially, as we have to remove references to dead
+    // blocks.
+    if (PHINode *PN = dyn_cast<PHINode>(I)) {
+      // Skip over all PHI nodes, remembering them for later.
+      BasicBlock::const_iterator OldI = BI->begin();
+      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
+        PHIToResolve.push_back(cast<PHINode>(OldI));
+    }
+    
+    // Otherwise, remap the rest of the instructions normally.
+    for (; I != NewBB->end(); ++I)
+      RemapInstruction(I, ValueMap);
+  }
+  
+  // Defer PHI resolution until rest of function is resolved, PHI resolution
+  // requires the CFG to be up-to-date.
+  for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+    const PHINode *OPN = PHIToResolve[phino];
+    unsigned NumPreds = OPN->getNumIncomingValues();
+    const BasicBlock *OldBB = OPN->getParent();
+    BasicBlock *NewBB = cast<BasicBlock>(ValueMap[OldBB]);
+
+    // Map operands for blocks that are live and remove operands for blocks
+    // that are dead.
+    for (; phino != PHIToResolve.size() &&
+         PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+      OPN = PHIToResolve[phino];
+      PHINode *PN = cast<PHINode>(ValueMap[OPN]);
+      for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+        if (BasicBlock *MappedBlock = 
+            cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) {
+          Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap);
+          assert(InVal && "Unknown input value?");
+          PN->setIncomingValue(pred, InVal);
+          PN->setIncomingBlock(pred, MappedBlock);
+        } else {
+          PN->removeIncomingValue(pred, false);
+          --pred, --e;  // Revisit the next entry.
+        }
+      } 
+    }
+    
+    // The loop above has removed PHI entries for those blocks that are dead
+    // and has updated others.  However, if a block is live (i.e. copied over)
+    // but its terminator has been changed to not go to this block, then our
+    // phi nodes will have invalid entries.  Update the PHI nodes in this
+    // case.
+    PHINode *PN = cast<PHINode>(NewBB->begin());
+    NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+    if (NumPreds != PN->getNumIncomingValues()) {
+      assert(NumPreds < PN->getNumIncomingValues());
+      // Count how many times each predecessor comes to this block.
+      std::map<BasicBlock*, unsigned> PredCount;
+      for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
+           PI != E; ++PI)
+        --PredCount[*PI];
+      
+      // Figure out how many entries to remove from each PHI.
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        ++PredCount[PN->getIncomingBlock(i)];
+      
+      // At this point, the excess predecessor entries are positive in the
+      // map.  Loop over all of the PHIs and remove excess predecessor
+      // entries.
+      BasicBlock::iterator I = NewBB->begin();
+      for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+        for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
+             E = PredCount.end(); PCI != E; ++PCI) {
+          BasicBlock *Pred     = PCI->first;
+          for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
+            PN->removeIncomingValue(Pred, false);
+        }
+      }
+    }
+    
+    // If the loops above have made these phi nodes have 0 or 1 operand,
+    // replace them with undef or the input value.  We must do this for
+    // correctness, because 0-operand phis are not valid.
+    PN = cast<PHINode>(NewBB->begin());
+    if (PN->getNumIncomingValues() == 0) {
+      BasicBlock::iterator I = NewBB->begin();
+      BasicBlock::const_iterator OldI = OldBB->begin();
+      while ((PN = dyn_cast<PHINode>(I++))) {
+        Value *NV = UndefValue::get(PN->getType());
+        PN->replaceAllUsesWith(NV);
+        assert(ValueMap[OldI] == PN && "ValueMap mismatch");
+        ValueMap[OldI] = NV;
+        PN->eraseFromParent();
+        ++OldI;
+      }
+    }
+    // NOTE: We cannot eliminate single entry phi nodes here, because of
+    // ValueMap.  Single entry phi nodes can have multiple ValueMap entries
+    // pointing at them.  Thus, deleting one would require scanning the ValueMap
+    // to update any entries in it that would require that.  This would be
+    // really slow.
+  }
+  
+  // Now that the inlined function body has been fully constructed, go through
+  // and zap unconditional fall-through branches.  This happen all the time when
+  // specializing code: code specialization turns conditional branches into
+  // uncond branches, and this code folds them.
+  Function::iterator I = cast<BasicBlock>(ValueMap[&OldFunc->getEntryBlock()]);
+  while (I != NewFunc->end()) {
+    BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+    if (!BI || BI->isConditional()) { ++I; continue; }
+    
+    // Note that we can't eliminate uncond branches if the destination has
+    // single-entry PHI nodes.  Eliminating the single-entry phi nodes would
+    // require scanning the ValueMap to update any entries that point to the phi
+    // node.
+    BasicBlock *Dest = BI->getSuccessor(0);
+    if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+      ++I; continue;
+    }
+    
+    // We know all single-entry PHI nodes in the inlined function have been
+    // removed, so we just need to splice the blocks.
+    BI->eraseFromParent();
+    
+    // Move all the instructions in the succ to the pred.
+    I->getInstList().splice(I->end(), Dest->getInstList());
+    
+    // Make all PHI nodes that referred to Dest now refer to I as their source.
+    Dest->replaceAllUsesWith(I);
+
+    // Remove the dest block.
+    Dest->eraseFromParent();
+    
+    // Do not increment I, iteratively merge all things this block branches to.
+  }
+}
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 0000000..d64d58f
--- /dev/null
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,124 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Constant.h"
+#include "ValueMapper.h"
+using namespace llvm;
+
+/// CloneModule - Return an exact copy of the specified module.  This is not as
+/// easy as it might seem because we have to worry about making copies of global
+/// variables and functions, and making their (initializers and references,
+/// respectively) refer to the right globals.
+///
+Module *llvm::CloneModule(const Module *M) {
+  // Create the value map that maps things from the old module over to the new
+  // module.
+  DenseMap<const Value*, Value*> ValueMap;
+  return CloneModule(M, ValueMap);
+}
+
+Module *llvm::CloneModule(const Module *M,
+                          DenseMap<const Value*, Value*> &ValueMap) {
+  // First off, we need to create the new module...
+  Module *New = new Module(M->getModuleIdentifier());
+  New->setDataLayout(M->getDataLayout());
+  New->setTargetTriple(M->getTargetTriple());
+  New->setModuleInlineAsm(M->getModuleInlineAsm());
+
+  // Copy all of the type symbol table entries over.
+  const TypeSymbolTable &TST = M->getTypeSymbolTable();
+  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); 
+       TI != TE; ++TI)
+    New->addTypeName(TI->first, TI->second);
+  
+  // Copy all of the dependent libraries over.
+  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+    New->addLibrary(*I);
+
+  // Loop over all of the global variables, making corresponding globals in the
+  // new module.  Here we add them to the ValueMap and to the new Module.  We
+  // don't worry about attributes or initializers, they will come later.
+  //
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    ValueMap[I] = new GlobalVariable(I->getType()->getElementType(), false,
+                                     GlobalValue::ExternalLinkage, 0,
+                                     I->getName(), New);
+
+  // Loop over the functions in the module, making external functions as before
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    Function *NF =
+      new Function(cast<FunctionType>(I->getType()->getElementType()),
+                   GlobalValue::ExternalLinkage, I->getName(), New);
+    NF->setCallingConv(I->getCallingConv());
+    ValueMap[I]= NF;
+  }
+
+  // Loop over the aliases in the module
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    ValueMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
+                                  I->getName(), NULL, New);
+  
+  // Now that all of the things that global variable initializer can refer to
+  // have been created, loop through and copy the global variable referrers
+  // over...  We also set the attributes on the global now.
+  //
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]);
+    if (I->hasInitializer())
+      GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
+                                                 ValueMap)));
+    GV->setLinkage(I->getLinkage());
+    GV->setThreadLocal(I->isThreadLocal());
+    GV->setConstant(I->isConstant());
+  }
+
+  // Similarly, copy over function bodies now...
+  //
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    Function *F = cast<Function>(ValueMap[I]);
+    if (!I->isDeclaration()) {
+      Function::arg_iterator DestI = F->arg_begin();
+      for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
+           ++J) {
+        DestI->setName(J->getName());
+        ValueMap[J] = DestI++;
+      }
+
+      std::vector<ReturnInst*> Returns;  // Ignore returns cloned...
+      CloneFunctionInto(F, I, ValueMap, Returns);
+    }
+
+    F->setLinkage(I->getLinkage());
+  }
+
+  // And aliases
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I) {
+    GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]);
+    GA->setLinkage(I->getLinkage());
+    if (const Constant* C = I->getAliasee())
+      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap)));
+  }
+  
+  return New;
+}
+
+// vim: sw=2
diff --git a/lib/Transforms/Utils/CloneTrace.cpp b/lib/Transforms/Utils/CloneTrace.cpp
new file mode 100644
index 0000000..97e57b2
--- /dev/null
+++ b/lib/Transforms/Utils/CloneTrace.cpp
@@ -0,0 +1,120 @@
+//===- CloneTrace.cpp - Clone a trace -------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneTrace interface, which is used when writing
+// runtime optimizations. It takes a vector of basic blocks clones the basic
+// blocks, removes internal phi nodes, adds it to the same function as the
+// original (although there is no jump to it) and returns the new vector of
+// basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Trace.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "ValueMapper.h"
+using namespace llvm;
+
+//Clones the trace (a vector of basic blocks)
+std::vector<BasicBlock *>
+llvm::CloneTrace(const std::vector<BasicBlock*> &origTrace) {
+  std::vector<BasicBlock *> clonedTrace;
+  DenseMap<const Value*, Value*> ValueMap;
+
+  //First, loop over all the Basic Blocks in the trace and copy
+  //them using CloneBasicBlock. Also fix the phi nodes during
+  //this loop. To fix the phi nodes, we delete incoming branches
+  //that are not in the trace.
+  for(std::vector<BasicBlock *>::const_iterator T = origTrace.begin(),
+    End = origTrace.end(); T != End; ++T) {
+
+    //Clone Basic Block
+    BasicBlock *clonedBlock =
+      CloneBasicBlock(*T, ValueMap, ".tr", (*T)->getParent());
+
+    //Add it to our new trace
+    clonedTrace.push_back(clonedBlock);
+
+    //Add this new mapping to our Value Map
+    ValueMap[*T] = clonedBlock;
+
+    //Loop over the phi instructions and delete operands
+    //that are from blocks not in the trace
+    //only do this if we are NOT the first block
+    if(T != origTrace.begin()) {
+      for (BasicBlock::iterator I = clonedBlock->begin();
+           isa<PHINode>(I); ++I) {
+        PHINode *PN = cast<PHINode>(I);
+        //get incoming value for the previous BB
+        Value *V = PN->getIncomingValueForBlock(*(T-1));
+        assert(V && "No incoming value from a BasicBlock in our trace!");
+
+        //remap our phi node to point to incoming value
+        ValueMap[*&I] = V;
+
+        //remove phi node
+        clonedBlock->getInstList().erase(PN);
+      }
+    }
+  }
+
+  //Second loop to do the remapping
+  for(std::vector<BasicBlock *>::const_iterator BB = clonedTrace.begin(),
+    BE = clonedTrace.end(); BB != BE; ++BB) {
+    for(BasicBlock::iterator I = (*BB)->begin(); I != (*BB)->end(); ++I) {
+
+      //Loop over all the operands of the instruction
+      for(unsigned op=0, E = I->getNumOperands(); op != E; ++op) {
+    const Value *Op = I->getOperand(op);
+
+    //Get it out of the value map
+    Value *V = ValueMap[Op];
+
+    //If not in the value map, then its outside our trace so ignore
+    if(V != 0)
+      I->setOperand(op,V);
+      }
+    }
+  }
+
+  //return new vector of basic blocks
+  return clonedTrace;
+}
+
+/// CloneTraceInto - Clone T into NewFunc. Original<->clone mapping is
+/// saved in ValueMap.
+///
+void llvm::CloneTraceInto(Function *NewFunc, Trace &T,
+                          DenseMap<const Value*, Value*> &ValueMap,
+                          const char *NameSuffix) {
+  assert(NameSuffix && "NameSuffix cannot be null!");
+
+  // Loop over all of the basic blocks in the trace, cloning them as
+  // appropriate.
+  //
+  for (Trace::const_iterator BI = T.begin(), BE = T.end(); BI != BE; ++BI) {
+    const BasicBlock *BB = *BI;
+
+    // Create a new basic block and copy instructions into it!
+    BasicBlock *CBB = CloneBasicBlock(BB, ValueMap, NameSuffix, NewFunc);
+    ValueMap[BB] = CBB;                       // Add basic block mapping.
+  }
+
+  // Loop over all of the instructions in the new function, fixing up operand
+  // references as we go.  This uses ValueMap to do all the hard work.
+  //
+  for (Function::iterator BB =
+         cast<BasicBlock>(ValueMap[T.getEntryBasicBlock()]),
+         BE = NewFunc->end(); BB != BE; ++BB)
+    // Loop over all instructions, fixing each one as we find it...
+    for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+      RemapInstruction(II, ValueMap);
+}
+
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 0000000..aaf9986
--- /dev/null
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,737 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extrator. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+                 cl::desc("Aggregate arguments to code-extracted functions"));
+
+namespace {
+  class VISIBILITY_HIDDEN CodeExtractor {
+    typedef std::vector<Value*> Values;
+    std::set<BasicBlock*> BlocksToExtract;
+    DominatorTree* DT;
+    bool AggregateArgs;
+    unsigned NumExitBlocks;
+    const Type *RetTy;
+  public:
+    CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false)
+      : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {}
+
+    Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code);
+
+    bool isEligible(const std::vector<BasicBlock*> &code);
+
+  private:
+    /// definedInRegion - Return true if the specified value is defined in the
+    /// extracted region.
+    bool definedInRegion(Value *V) const {
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        if (BlocksToExtract.count(I->getParent()))
+          return true;
+      return false;
+    }
+
+    /// definedInCaller - Return true if the specified value is defined in the
+    /// function being code extracted, but not in the region being extracted.
+    /// These values must be passed in as live-ins to the function.
+    bool definedInCaller(Value *V) const {
+      if (isa<Argument>(V)) return true;
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        if (!BlocksToExtract.count(I->getParent()))
+          return true;
+      return false;
+    }
+
+    void severSplitPHINodes(BasicBlock *&Header);
+    void splitReturnBlocks();
+    void findInputsOutputs(Values &inputs, Values &outputs);
+
+    Function *constructFunction(const Values &inputs,
+                                const Values &outputs,
+                                BasicBlock *header,
+                                BasicBlock *newRootNode, BasicBlock *newHeader,
+                                Function *oldFunction, Module *M);
+
+    void moveCodeToFunction(Function *newFunction);
+
+    void emitCallAndSwitchStatement(Function *newFunction,
+                                    BasicBlock *newHeader,
+                                    Values &inputs,
+                                    Values &outputs);
+
+  };
+}
+
+/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
+/// region, we need to split the entry block of the region so that the PHI node
+/// is easier to deal with.
+void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+  bool HasPredsFromRegion = false;
+  unsigned NumPredsOutsideRegion = 0;
+
+  if (Header != &Header->getParent()->getEntryBlock()) {
+    PHINode *PN = dyn_cast<PHINode>(Header->begin());
+    if (!PN) return;  // No PHI nodes.
+
+    // If the header node contains any PHI nodes, check to see if there is more
+    // than one entry from outside the region.  If so, we need to sever the
+    // header block into two.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (BlocksToExtract.count(PN->getIncomingBlock(i)))
+        HasPredsFromRegion = true;
+      else
+        ++NumPredsOutsideRegion;
+
+    // If there is one (or fewer) predecessor from outside the region, we don't
+    // need to do anything special.
+    if (NumPredsOutsideRegion <= 1) return;
+  }
+
+  // Otherwise, we need to split the header block into two pieces: one
+  // containing PHI nodes merging values from outside of the region, and a
+  // second that contains all of the code for the block and merges back any
+  // incoming values from inside of the region.
+  BasicBlock::iterator AfterPHIs = Header->begin();
+  while (isa<PHINode>(AfterPHIs)) ++AfterPHIs;
+  BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
+                                              Header->getName()+".ce");
+
+  // We only want to code extract the second block now, and it becomes the new
+  // header of the region.
+  BasicBlock *OldPred = Header;
+  BlocksToExtract.erase(OldPred);
+  BlocksToExtract.insert(NewBB);
+  Header = NewBB;
+
+  // Okay, update dominator sets. The blocks that dominate the new one are the
+  // blocks that dominate TIBB plus the new block itself.
+  if (DT)
+    DT->splitBlock(NewBB);
+
+  // Okay, now we need to adjust the PHI nodes and any branches from within the
+  // region to go to the new header block instead of the old header block.
+  if (HasPredsFromRegion) {
+    PHINode *PN = cast<PHINode>(OldPred->begin());
+    // Loop over all of the predecessors of OldPred that are in the region,
+    // changing them to branch to NewBB instead.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+        TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+        TI->replaceUsesOfWith(OldPred, NewBB);
+      }
+
+    // Okay, everthing within the region is now branching to the right block, we
+    // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+    for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+      PHINode *PN = cast<PHINode>(AfterPHIs);
+      // Create a new PHI node in the new region, which has an incoming value
+      // from OldPred of PN.
+      PHINode *NewPN = new PHINode(PN->getType(), PN->getName()+".ce",
+                                   NewBB->begin());
+      NewPN->addIncoming(PN, OldPred);
+
+      // Loop over all of the incoming value in PN, moving them to NewPN if they
+      // are from the extracted region.
+      for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+        if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+          NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+          PN->removeIncomingValue(i);
+          --i;
+        }
+      }
+    }
+  }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+  for (std::set<BasicBlock*>::iterator I = BlocksToExtract.begin(),
+         E = BlocksToExtract.end(); I != E; ++I)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator()))
+      (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+}
+
+// findInputsOutputs - Find inputs to, outputs from the code region.
+//
+void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) {
+  std::set<BasicBlock*> ExitBlocks;
+  for (std::set<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(),
+       ce = BlocksToExtract.end(); ci != ce; ++ci) {
+    BasicBlock *BB = *ci;
+
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      // If a used value is defined outside the region, it's an input.  If an
+      // instruction is used outside the region, it's an output.
+      for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O)
+        if (definedInCaller(*O))
+          inputs.push_back(*O);
+
+      // Consider uses of this instruction (outputs).
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+           UI != E; ++UI)
+        if (!definedInRegion(*UI)) {
+          outputs.push_back(I);
+          break;
+        }
+    } // for: insts
+
+    // Keep track of the exit blocks from the region.
+    TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      if (!BlocksToExtract.count(TI->getSuccessor(i)))
+        ExitBlocks.insert(TI->getSuccessor(i));
+  } // for: basic blocks
+
+  NumExitBlocks = ExitBlocks.size();
+
+  // Eliminate duplicates.
+  std::sort(inputs.begin(), inputs.end());
+  inputs.erase(std::unique(inputs.begin(), inputs.end()), inputs.end());
+  std::sort(outputs.begin(), outputs.end());
+  outputs.erase(std::unique(outputs.begin(), outputs.end()), outputs.end());
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+///
+Function *CodeExtractor::constructFunction(const Values &inputs,
+                                           const Values &outputs,
+                                           BasicBlock *header,
+                                           BasicBlock *newRootNode,
+                                           BasicBlock *newHeader,
+                                           Function *oldFunction,
+                                           Module *M) {
+  DOUT << "inputs: " << inputs.size() << "\n";
+  DOUT << "outputs: " << outputs.size() << "\n";
+
+  // This function returns unsigned, outputs will go back by reference.
+  switch (NumExitBlocks) {
+  case 0:
+  case 1: RetTy = Type::VoidTy; break;
+  case 2: RetTy = Type::Int1Ty; break;
+  default: RetTy = Type::Int16Ty; break;
+  }
+
+  std::vector<const Type*> paramTy;
+
+  // Add the types of the input values to the function's argument list
+  for (Values::const_iterator i = inputs.begin(),
+         e = inputs.end(); i != e; ++i) {
+    const Value *value = *i;
+    DOUT << "value used in func: " << *value << "\n";
+    paramTy.push_back(value->getType());
+  }
+
+  // Add the types of the output values to the function's argument list.
+  for (Values::const_iterator I = outputs.begin(), E = outputs.end();
+       I != E; ++I) {
+    DOUT << "instr used in func: " << **I << "\n";
+    if (AggregateArgs)
+      paramTy.push_back((*I)->getType());
+    else
+      paramTy.push_back(PointerType::get((*I)->getType()));
+  }
+
+  DOUT << "Function type: " << *RetTy << " f(";
+  for (std::vector<const Type*>::iterator i = paramTy.begin(),
+         e = paramTy.end(); i != e; ++i)
+    DOUT << **i << ", ";
+  DOUT << ")\n";
+
+  if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+    PointerType *StructPtr = PointerType::get(StructType::get(paramTy));
+    paramTy.clear();
+    paramTy.push_back(StructPtr);
+  }
+  const FunctionType *funcType = FunctionType::get(RetTy, paramTy, false);
+
+  // Create the new function
+  Function *newFunction = new Function(funcType,
+                                       GlobalValue::InternalLinkage,
+                                       oldFunction->getName() + "_" +
+                                       header->getName(), M);
+  newFunction->getBasicBlockList().push_back(newRootNode);
+
+  // Create an iterator to name all of the arguments we inserted.
+  Function::arg_iterator AI = newFunction->arg_begin();
+
+  // Rewrite all users of the inputs in the extracted region to use the
+  // arguments (or appropriate addressing into struct) instead.
+  for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+    Value *RewriteVal;
+    if (AggregateArgs) {
+      Value *Idx0 = Constant::getNullValue(Type::Int32Ty);
+      Value *Idx1 = ConstantInt::get(Type::Int32Ty, i);
+      std::string GEPname = "gep_" + inputs[i]->getName();
+      TerminatorInst *TI = newFunction->begin()->getTerminator();
+      GetElementPtrInst *GEP = new GetElementPtrInst(AI, Idx0, Idx1, 
+                                                     GEPname, TI);
+      RewriteVal = new LoadInst(GEP, "load" + GEPname, TI);
+    } else
+      RewriteVal = AI++;
+
+    std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+    for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
+         use != useE; ++use)
+      if (Instruction* inst = dyn_cast<Instruction>(*use))
+        if (BlocksToExtract.count(inst->getParent()))
+          inst->replaceUsesOfWith(inputs[i], RewriteVal);
+  }
+
+  // Set names for input and output arguments.
+  if (!AggregateArgs) {
+    AI = newFunction->arg_begin();
+    for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
+      AI->setName(inputs[i]->getName());
+    for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
+      AI->setName(outputs[i]->getName()+".out");
+  }
+
+  // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+  // within the new function. This must be done before we lose track of which
+  // blocks were originally in the code region.
+  std::vector<User*> Users(header->use_begin(), header->use_end());
+  for (unsigned i = 0, e = Users.size(); i != e; ++i)
+    // The BasicBlock which contains the branch is not in the region
+    // modify the branch target to a new block
+    if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
+      if (!BlocksToExtract.count(TI->getParent()) &&
+          TI->getParent()->getParent() == oldFunction)
+        TI->replaceUsesOfWith(header, newHeader);
+
+  return newFunction;
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+void CodeExtractor::
+emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
+                           Values &inputs, Values &outputs) {
+  // Emit a call to the new function, passing in: *pointer to struct (if
+  // aggregating parameters), or plan inputs and allocated memory for outputs
+  std::vector<Value*> params, StructValues, ReloadOutputs;
+
+  // Add inputs as params, or to be filled into the struct
+  for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
+    if (AggregateArgs)
+      StructValues.push_back(*i);
+    else
+      params.push_back(*i);
+
+  // Create allocas for the outputs
+  for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) {
+    if (AggregateArgs) {
+      StructValues.push_back(*i);
+    } else {
+      AllocaInst *alloca =
+        new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+                       codeReplacer->getParent()->begin()->begin());
+      ReloadOutputs.push_back(alloca);
+      params.push_back(alloca);
+    }
+  }
+
+  AllocaInst *Struct = 0;
+  if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+    std::vector<const Type*> ArgTypes;
+    for (Values::iterator v = StructValues.begin(),
+           ve = StructValues.end(); v != ve; ++v)
+      ArgTypes.push_back((*v)->getType());
+
+    // Allocate a struct at the beginning of this function
+    Type *StructArgTy = StructType::get(ArgTypes);
+    Struct =
+      new AllocaInst(StructArgTy, 0, "structArg",
+                     codeReplacer->getParent()->begin()->begin());
+    params.push_back(Struct);
+
+    for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+      Value *Idx0 = Constant::getNullValue(Type::Int32Ty);
+      Value *Idx1 = ConstantInt::get(Type::Int32Ty, i);
+      GetElementPtrInst *GEP =
+        new GetElementPtrInst(Struct, Idx0, Idx1,
+                              "gep_" + StructValues[i]->getName());
+      codeReplacer->getInstList().push_back(GEP);
+      StoreInst *SI = new StoreInst(StructValues[i], GEP);
+      codeReplacer->getInstList().push_back(SI);
+    }
+  }
+
+  // Emit the call to the function
+  CallInst *call = new CallInst(newFunction, &params[0], params.size(),
+                                NumExitBlocks > 1 ? "targetBlock" : "");
+  codeReplacer->getInstList().push_back(call);
+
+  Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
+  unsigned FirstOut = inputs.size();
+  if (!AggregateArgs)
+    std::advance(OutputArgBegin, inputs.size());
+
+  // Reload the outputs passed in by reference
+  for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+    Value *Output = 0;
+    if (AggregateArgs) {
+      Value *Idx0 = Constant::getNullValue(Type::Int32Ty);
+      Value *Idx1 = ConstantInt::get(Type::Int32Ty, FirstOut + i);
+      GetElementPtrInst *GEP
+        = new GetElementPtrInst(Struct, Idx0, Idx1,
+                                "gep_reload_" + outputs[i]->getName());
+      codeReplacer->getInstList().push_back(GEP);
+      Output = GEP;
+    } else {
+      Output = ReloadOutputs[i];
+    }
+    LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+    codeReplacer->getInstList().push_back(load);
+    std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+    for (unsigned u = 0, e = Users.size(); u != e; ++u) {
+      Instruction *inst = cast<Instruction>(Users[u]);
+      if (!BlocksToExtract.count(inst->getParent()))
+        inst->replaceUsesOfWith(outputs[i], load);
+    }
+  }
+
+  // Now we can emit a switch statement using the call as a value.
+  SwitchInst *TheSwitch =
+    new SwitchInst(ConstantInt::getNullValue(Type::Int16Ty),
+                   codeReplacer, 0, codeReplacer);
+
+  // Since there may be multiple exits from the original region, make the new
+  // function return an unsigned, switch on that number.  This loop iterates
+  // over all of the blocks in the extracted region, updating any terminator
+  // instructions in the to-be-extracted region that branch to blocks that are
+  // not in the region to be extracted.
+  std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+
+  unsigned switchVal = 0;
+  for (std::set<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+         e = BlocksToExtract.end(); i != e; ++i) {
+    TerminatorInst *TI = (*i)->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      if (!BlocksToExtract.count(TI->getSuccessor(i))) {
+        BasicBlock *OldTarget = TI->getSuccessor(i);
+        // add a new basic block which returns the appropriate value
+        BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+        if (!NewTarget) {
+          // If we don't already have an exit stub for this non-extracted
+          // destination, create one now!
+          NewTarget = new BasicBlock(OldTarget->getName() + ".exitStub",
+                                     newFunction);
+          unsigned SuccNum = switchVal++;
+
+          Value *brVal = 0;
+          switch (NumExitBlocks) {
+          case 0:
+          case 1: break;  // No value needed.
+          case 2:         // Conditional branch, return a bool
+            brVal = ConstantInt::get(Type::Int1Ty, !SuccNum);
+            break;
+          default:
+            brVal = ConstantInt::get(Type::Int16Ty, SuccNum);
+            break;
+          }
+
+          ReturnInst *NTRet = new ReturnInst(brVal, NewTarget);
+
+          // Update the switch instruction.
+          TheSwitch->addCase(ConstantInt::get(Type::Int16Ty, SuccNum),
+                             OldTarget);
+
+          // Restore values just before we exit
+          Function::arg_iterator OAI = OutputArgBegin;
+          for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
+            // For an invoke, the normal destination is the only one that is
+            // dominated by the result of the invocation
+            BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
+
+            bool DominatesDef = true;
+
+            if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
+              DefBlock = Invoke->getNormalDest();
+
+              // Make sure we are looking at the original successor block, not
+              // at a newly inserted exit block, which won't be in the dominator
+              // info.
+              for (std::map<BasicBlock*, BasicBlock*>::iterator I =
+                     ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I)
+                if (DefBlock == I->second) {
+                  DefBlock = I->first;
+                  break;
+                }
+
+              // In the extract block case, if the block we are extracting ends
+              // with an invoke instruction, make sure that we don't emit a
+              // store of the invoke value for the unwind block.
+              if (!DT && DefBlock != OldTarget)
+                DominatesDef = false;
+            }
+
+            if (DT)
+              DominatesDef = DT->dominates(DefBlock, OldTarget);
+
+            if (DominatesDef) {
+              if (AggregateArgs) {
+                Value *Idx0 = Constant::getNullValue(Type::Int32Ty);
+                Value *Idx1 = ConstantInt::get(Type::Int32Ty,FirstOut+out);
+                GetElementPtrInst *GEP =
+                  new GetElementPtrInst(OAI, Idx0, Idx1,
+                                        "gep_" + outputs[out]->getName(),
+                                        NTRet);
+                new StoreInst(outputs[out], GEP, NTRet);
+              } else {
+                new StoreInst(outputs[out], OAI, NTRet);
+              }
+            }
+            // Advance output iterator even if we don't emit a store
+            if (!AggregateArgs) ++OAI;
+          }
+        }
+
+        // rewrite the original branch instruction with this new target
+        TI->setSuccessor(i, NewTarget);
+      }
+  }
+
+  // Now that we've done the deed, simplify the switch instruction.
+  const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+  switch (NumExitBlocks) {
+  case 0:
+    // There are no successors (the block containing the switch itself), which
+    // means that previously this was the last part of the function, and hence
+    // this should be rewritten as a `ret'
+
+    // Check if the function should return a value
+    if (OldFnRetTy == Type::VoidTy) {
+      new ReturnInst(0, TheSwitch);  // Return void
+    } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+      // return what we have
+      new ReturnInst(TheSwitch->getCondition(), TheSwitch);
+    } else {
+      // Otherwise we must have code extracted an unwind or something, just
+      // return whatever we want.
+      new ReturnInst(Constant::getNullValue(OldFnRetTy), TheSwitch);
+    }
+
+    TheSwitch->getParent()->getInstList().erase(TheSwitch);
+    break;
+  case 1:
+    // Only a single destination, change the switch into an unconditional
+    // branch.
+    new BranchInst(TheSwitch->getSuccessor(1), TheSwitch);
+    TheSwitch->getParent()->getInstList().erase(TheSwitch);
+    break;
+  case 2:
+    new BranchInst(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+                   call, TheSwitch);
+    TheSwitch->getParent()->getInstList().erase(TheSwitch);
+    break;
+  default:
+    // Otherwise, make the default destination of the switch instruction be one
+    // of the other successors.
+    TheSwitch->setOperand(0, call);
+    TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks));
+    TheSwitch->removeCase(NumExitBlocks);  // Remove redundant case
+    break;
+  }
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+  Function *oldFunc = (*BlocksToExtract.begin())->getParent();
+  Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+  Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+
+  for (std::set<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+         e = BlocksToExtract.end(); i != e; ++i) {
+    // Delete the basic block from the old function, and the list of blocks
+    oldBlocks.remove(*i);
+
+    // Insert this basic block into the new function
+    newBlocks.push_back(*i);
+  }
+}
+
+/// ExtractRegion - Removes a loop from a function, replaces it with a call to
+/// new function. Returns pointer to the new function.
+///
+/// algorithm:
+///
+/// find inputs and outputs for the region
+///
+/// for inputs: add to function as args, map input instr* to arg#
+/// for outputs: add allocas for scalars,
+///             add to func as args, map output instr* to arg#
+///
+/// rewrite func to use argument #s instead of instr*
+///
+/// for each scalar output in the function: at every exit, store intermediate
+/// computed result back into memory.
+///
+Function *CodeExtractor::
+ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
+  if (!isEligible(code))
+    return 0;
+
+  // 1) Find inputs, outputs
+  // 2) Construct new function
+  //  * Add allocas for defs, pass as args by reference
+  //  * Pass in uses as args
+  // 3) Move code region, add call instr to func
+  //
+  BlocksToExtract.insert(code.begin(), code.end());
+
+  Values inputs, outputs;
+
+  // Assumption: this is a single-entry code region, and the header is the first
+  // block in the region.
+  BasicBlock *header = code[0];
+
+  for (unsigned i = 1, e = code.size(); i != e; ++i)
+    for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]);
+         PI != E; ++PI)
+      assert(BlocksToExtract.count(*PI) &&
+             "No blocks in this region may have entries from outside the region"
+             " except for the first block!");
+
+  // If we have to split PHI nodes or the entry block, do so now.
+  severSplitPHINodes(header);
+
+  // If we have any return instructions in the region, split those blocks so
+  // that the return is not in the region.
+  splitReturnBlocks();
+
+  Function *oldFunction = header->getParent();
+
+  // This takes place of the original loop
+  BasicBlock *codeReplacer = new BasicBlock("codeRepl", oldFunction, header);
+
+  // The new function needs a root node because other nodes can branch to the
+  // head of the region, but the entry node of a function cannot have preds.
+  BasicBlock *newFuncRoot = new BasicBlock("newFuncRoot");
+  newFuncRoot->getInstList().push_back(new BranchInst(header));
+
+  // Find inputs to, outputs from the code region.
+  findInputsOutputs(inputs, outputs);
+
+  // Construct new function based on inputs/outputs & add allocas for all defs.
+  Function *newFunction = constructFunction(inputs, outputs, header,
+                                            newFuncRoot,
+                                            codeReplacer, oldFunction,
+                                            oldFunction->getParent());
+
+  emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+  moveCodeToFunction(newFunction);
+
+  // Loop over all of the PHI nodes in the header block, and change any
+  // references to the old incoming edge to be the new incoming edge.
+  for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!BlocksToExtract.count(PN->getIncomingBlock(i)))
+        PN->setIncomingBlock(i, newFuncRoot);
+  }
+
+  // Look at all successors of the codeReplacer block.  If any of these blocks
+  // had PHI nodes in them, we need to update the "from" block to be the code
+  // replacer, not the original block in the extracted region.
+  std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
+                                 succ_end(codeReplacer));
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+    for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PN = cast<PHINode>(I);
+      std::set<BasicBlock*> ProcessedPreds;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (BlocksToExtract.count(PN->getIncomingBlock(i)))
+          if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
+            PN->setIncomingBlock(i, codeReplacer);
+          else {
+            // There were multiple entries in the PHI for this block, now there
+            // is only one, so remove the duplicated entries.
+            PN->removeIncomingValue(i, false);
+            --i; --e;
+          }
+    }
+
+  //cerr << "NEW FUNCTION: " << *newFunction;
+  //  verifyFunction(*newFunction);
+
+  //  cerr << "OLD FUNCTION: " << *oldFunction;
+  //  verifyFunction(*oldFunction);
+
+  DEBUG(if (verifyFunction(*newFunction)) abort());
+  return newFunction;
+}
+
+bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) {
+  // Deny code region if it contains allocas or vastarts.
+  for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end();
+       BB != e; ++BB)
+    for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end();
+         I != Ie; ++I)
+      if (isa<AllocaInst>(*I))
+        return false;
+      else if (const CallInst *CI = dyn_cast<CallInst>(I))
+        if (const Function *F = CI->getCalledFunction())
+          if (F->getIntrinsicID() == Intrinsic::vastart)
+            return false;
+  return true;
+}
+
+
+/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new
+/// function
+///
+Function* llvm::ExtractCodeRegion(DominatorTree &DT,
+                                  const std::vector<BasicBlock*> &code,
+                                  bool AggregateArgs) {
+  return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code);
+}
+
+/// ExtractBasicBlock - slurp a natural loop into a brand new function
+///
+Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) {
+  return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks());
+}
+
+/// ExtractBasicBlock - slurp a basic block into a brand new function
+///
+Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) {
+  std::vector<BasicBlock*> Blocks;
+  Blocks.push_back(BB);
+  return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks);
+}
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 0000000..df332b2
--- /dev/null
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,133 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provide the function DemoteRegToStack().  This function takes a
+// virtual register computed by an Instruction and replaces it with a slot in
+// the stack frame, allocated via alloca. It returns the pointer to the
+// AllocaInst inserted.  After this function is called on an instruction, we are
+// guaranteed that the only user of the instruction is a store that is
+// immediately after it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include <map>
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca.  This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value.  It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+///
+AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads) {
+  if (I.use_empty()) return 0;                // nothing to do!
+
+  // Create a stack slot to hold the value.
+  Function *F = I.getParent()->getParent();
+  AllocaInst *Slot = new AllocaInst(I.getType(), 0, I.getName(),
+                                    F->getEntryBlock().begin());
+
+  // Change all of the users of the instruction to read from the stack slot
+  // instead.
+  while (!I.use_empty()) {
+    Instruction *U = cast<Instruction>(I.use_back());
+    if (PHINode *PN = dyn_cast<PHINode>(U)) {
+      // If this is a PHI node, we can't insert a load of the value before the
+      // use.  Instead, insert the load in the predecessor block corresponding
+      // to the incoming value.
+      //
+      // Note that if there are multiple edges from a basic block to this PHI
+      // node that we cannot multiple loads.  The problem is that the resultant
+      // PHI node will have multiple values (from each load) coming in from the
+      // same block, which is illegal SSA form.  For this reason, we keep track
+      // and reuse loads we insert.
+      std::map<BasicBlock*, Value*> Loads;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingValue(i) == &I) {
+          Value *&V = Loads[PN->getIncomingBlock(i)];
+          if (V == 0) {
+            // Insert the load into the predecessor block
+            V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, 
+                             PN->getIncomingBlock(i)->getTerminator());
+          }
+          PN->setIncomingValue(i, V);
+        }
+
+    } else {
+      // If this is a normal instruction, just insert a load.
+      Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+      U->replaceUsesOfWith(&I, V);
+    }
+  }
+
+
+  // Insert stores of the computed value into the stack slot.  We have to be
+  // careful is I is an invoke instruction though, because we can't insert the
+  // store AFTER the terminator instruction.
+  BasicBlock::iterator InsertPt;
+  if (!isa<TerminatorInst>(I)) {
+    InsertPt = &I;
+    ++InsertPt;
+  } else {
+    // We cannot demote invoke instructions to the stack if their normal edge
+    // is critical.
+    InvokeInst &II = cast<InvokeInst>(I);
+    assert(II.getNormalDest()->getSinglePredecessor() &&
+           "Cannot demote invoke with a critical successor!");
+    InsertPt = II.getNormalDest()->begin();
+  }
+
+  for (; isa<PHINode>(InsertPt); ++InsertPt)
+  /* empty */;   // Don't insert before any PHI nodes.
+  new StoreInst(&I, Slot, InsertPt);
+
+  return Slot;
+}
+
+
+/// DemotePHIToStack - This function takes a virtual register computed by a phi
+/// node and replaces it with a slot in the stack frame, allocated via alloca.
+/// The phi node is deleted and it returns the pointer to the alloca inserted.
+AllocaInst* llvm::DemotePHIToStack(PHINode *P) {
+  if (P->use_empty()) {
+    P->eraseFromParent();    
+    return 0;                
+  }
+  
+  // Create a stack slot to hold the value.
+  Function *F = P->getParent()->getParent();
+  AllocaInst *Slot = new AllocaInst(P->getType(), 0, P->getName(),
+                                    F->getEntryBlock().begin());
+  
+  // Iterate over each operand, insert store in each predecessor.
+  for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+      assert(II->getParent() != P->getIncomingBlock(i) && 
+             "Invoke edge not supported yet");
+    }
+    new StoreInst(P->getIncomingValue(i), Slot, 
+                  P->getIncomingBlock(i)->getTerminator());
+  }
+  
+  // Insert load in place of the phi and replace all uses.
+  BasicBlock::iterator InsertPt;
+  for (InsertPt = P->getParent()->getInstList().begin(); 
+       isa<PHINode>(InsertPt); ++InsertPt);
+  Value *V = new LoadInst(Slot, P->getName()+".reload", P);
+  P->replaceAllUsesWith(V);
+  
+  // Delete phi.
+  P->eraseFromParent();
+  
+  return Slot;
+}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 0000000..9735a2f
--- /dev/null
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,496 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
+using namespace llvm;
+
+bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD) {
+  return InlineFunction(CallSite(CI), CG, TD);
+}
+bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) {
+  return InlineFunction(CallSite(II), CG, TD);
+}
+
+/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes and turn unwind
+/// instructions into branches to the invoke unwind dest.
+///
+/// II is the invoke instruction begin inlined.  FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
+                                ClonedCodeInfo &InlinedCodeInfo) {
+  BasicBlock *InvokeDest = II->getUnwindDest();
+  std::vector<Value*> InvokeDestPHIValues;
+
+  // If there are PHI nodes in the unwind destination block, we need to
+  // keep track of which values came into them from this invoke, then remove
+  // the entry for this block.
+  BasicBlock *InvokeBlock = II->getParent();
+  for (BasicBlock::iterator I = InvokeDest->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    // Save the value to use for this edge.
+    InvokeDestPHIValues.push_back(PN->getIncomingValueForBlock(InvokeBlock));
+  }
+
+  Function *Caller = FirstNewBlock->getParent();
+  
+  // The inlined code is currently at the end of the function, scan from the
+  // start of the inlined code to its end, checking for stuff we need to
+  // rewrite.
+  if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) {
+    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+         BB != E; ++BB) {
+      if (InlinedCodeInfo.ContainsCalls) {
+        for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){
+          Instruction *I = BBI++;
+          
+          // We only need to check for function calls: inlined invoke
+          // instructions require no special handling.
+          if (!isa<CallInst>(I)) continue;
+          CallInst *CI = cast<CallInst>(I);
+
+          // If this is an intrinsic function call or an inline asm, don't
+          // convert it to an invoke.
+          if ((CI->getCalledFunction() &&
+               CI->getCalledFunction()->getIntrinsicID()) ||
+              isa<InlineAsm>(CI->getCalledValue()))
+            continue;
+          
+          // Convert this function call into an invoke instruction.
+          // First, split the basic block.
+          BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+          
+          // Next, create the new invoke instruction, inserting it at the end
+          // of the old basic block.
+          SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
+          InvokeInst *II =
+            new InvokeInst(CI->getCalledValue(), Split, InvokeDest,
+                           &InvokeArgs[0], InvokeArgs.size(),
+                           CI->getName(), BB->getTerminator());
+          II->setCallingConv(CI->getCallingConv());
+          
+          // Make sure that anything using the call now uses the invoke!
+          CI->replaceAllUsesWith(II);
+          
+          // Delete the unconditional branch inserted by splitBasicBlock
+          BB->getInstList().pop_back();
+          Split->getInstList().pop_front();  // Delete the original call
+          
+          // Update any PHI nodes in the exceptional block to indicate that
+          // there is now a new entry in them.
+          unsigned i = 0;
+          for (BasicBlock::iterator I = InvokeDest->begin();
+               isa<PHINode>(I); ++I, ++i) {
+            PHINode *PN = cast<PHINode>(I);
+            PN->addIncoming(InvokeDestPHIValues[i], BB);
+          }
+            
+          // This basic block is now complete, start scanning the next one.
+          break;
+        }
+      }
+      
+      if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+        // An UnwindInst requires special handling when it gets inlined into an
+        // invoke site.  Once this happens, we know that the unwind would cause
+        // a control transfer to the invoke exception destination, so we can
+        // transform it into a direct branch to the exception destination.
+        new BranchInst(InvokeDest, UI);
+        
+        // Delete the unwind instruction!
+        UI->getParent()->getInstList().pop_back();
+        
+        // Update any PHI nodes in the exceptional block to indicate that
+        // there is now a new entry in them.
+        unsigned i = 0;
+        for (BasicBlock::iterator I = InvokeDest->begin();
+             isa<PHINode>(I); ++I, ++i) {
+          PHINode *PN = cast<PHINode>(I);
+          PN->addIncoming(InvokeDestPHIValues[i], BB);
+        }
+      }
+    }
+  }
+
+  // Now that everything is happy, we have one final detail.  The PHI nodes in
+  // the exception destination block still have entries due to the original
+  // invoke instruction.  Eliminate these entries (which might even delete the
+  // PHI node) now.
+  InvokeDest->removePredecessor(II->getParent());
+}
+
+/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
+/// into the caller, update the specified callgraph to reflect the changes we
+/// made.  Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph will be remain.
+static void UpdateCallGraphAfterInlining(const Function *Caller,
+                                         const Function *Callee,
+                                         Function::iterator FirstNewBlock,
+                                       DenseMap<const Value*, Value*> &ValueMap,
+                                         CallGraph &CG) {
+  // Update the call graph by deleting the edge from Callee to Caller
+  CallGraphNode *CalleeNode = CG[Callee];
+  CallGraphNode *CallerNode = CG[Caller];
+  CallerNode->removeCallEdgeTo(CalleeNode);
+  
+  // Since we inlined some uninlined call sites in the callee into the caller,
+  // add edges from the caller to all of the callees of the callee.
+  for (CallGraphNode::iterator I = CalleeNode->begin(),
+       E = CalleeNode->end(); I != E; ++I) {
+    const Instruction *OrigCall = I->first.getInstruction();
+    
+    DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall);
+    // Only copy the edge if the call was inlined!
+    if (VMI != ValueMap.end() && VMI->second) {
+      // If the call was inlined, but then constant folded, there is no edge to
+      // add.  Check for this case.
+      if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second))
+        CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
+    }
+  }
+}
+
+
+// InlineFunction - This function inlines the called function into the basic
+// block of the caller.  This returns false if it is not possible to inline this
+// call.  The program is still in a well defined state if this occurs though.
+//
+// Note that this only does one level of inlining.  For example, if the
+// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+// exists in the instruction stream.  Similiarly this will inline a recursive
+// function by one level.
+//
+bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {
+  Instruction *TheCall = CS.getInstruction();
+  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
+         "Instruction not in function!");
+
+  const Function *CalledFunc = CS.getCalledFunction();
+  if (CalledFunc == 0 ||          // Can't inline external function or indirect
+      CalledFunc->isDeclaration() || // call, or call to a vararg function!
+      CalledFunc->getFunctionType()->isVarArg()) return false;
+
+
+  // If the call to the callee is a non-tail call, we must clear the 'tail'
+  // flags on any calls that we inline.
+  bool MustClearTailCallFlags =
+    isa<CallInst>(TheCall) && !cast<CallInst>(TheCall)->isTailCall();
+
+  BasicBlock *OrigBB = TheCall->getParent();
+  Function *Caller = OrigBB->getParent();
+
+  // Get an iterator to the last basic block in the function, which will have
+  // the new function inlined after it.
+  //
+  Function::iterator LastBlock = &Caller->back();
+
+  // Make sure to capture all of the return instructions from the cloned
+  // function.
+  std::vector<ReturnInst*> Returns;
+  ClonedCodeInfo InlinedFunctionInfo;
+  Function::iterator FirstNewBlock;
+  
+  { // Scope to destroy ValueMap after cloning.
+    DenseMap<const Value*, Value*> ValueMap;
+
+    // Calculate the vector of arguments to pass into the function cloner, which
+    // matches up the formal to the actual argument values.
+    assert(std::distance(CalledFunc->arg_begin(), CalledFunc->arg_end()) ==
+           std::distance(CS.arg_begin(), CS.arg_end()) &&
+           "No varargs calls can be inlined!");
+    CallSite::arg_iterator AI = CS.arg_begin();
+    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+           E = CalledFunc->arg_end(); I != E; ++I, ++AI)
+      ValueMap[I] = *AI;
+
+    // We want the inliner to prune the code as it copies.  We would LOVE to
+    // have no dead or constant instructions leftover after inlining occurs
+    // (which can happen, e.g., because an argument was constant), but we'll be
+    // happy with whatever the cloner can do.
+    CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
+                              &InlinedFunctionInfo, TD);
+    
+    // Remember the first block that is newly cloned over.
+    FirstNewBlock = LastBlock; ++FirstNewBlock;
+    
+    // Update the callgraph if requested.
+    if (CG)
+      UpdateCallGraphAfterInlining(Caller, CalledFunc, FirstNewBlock, ValueMap,
+                                   *CG);
+  }
+ 
+  // If there are any alloca instructions in the block that used to be the entry
+  // block for the callee, move them to the entry block of the caller.  First
+  // calculate which instruction they should be inserted before.  We insert the
+  // instructions at the end of the current alloca list.
+  //
+  {
+    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+    for (BasicBlock::iterator I = FirstNewBlock->begin(),
+           E = FirstNewBlock->end(); I != E; )
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) {
+        // If the alloca is now dead, remove it.  This often occurs due to code
+        // specialization.
+        if (AI->use_empty()) {
+          AI->eraseFromParent();
+          continue;
+        }
+        
+        if (isa<Constant>(AI->getArraySize())) {
+          // Scan for the block of allocas that we can move over, and move them
+          // all at once.
+          while (isa<AllocaInst>(I) &&
+                 isa<Constant>(cast<AllocaInst>(I)->getArraySize()))
+            ++I;
+
+          // Transfer all of the allocas over in a block.  Using splice means
+          // that the instructions aren't removed from the symbol table, then
+          // reinserted.
+          Caller->getEntryBlock().getInstList().splice(
+              InsertPoint,
+              FirstNewBlock->getInstList(),
+              AI, I);
+        }
+      }
+  }
+
+  // If the inlined code contained dynamic alloca instructions, wrap the inlined
+  // code with llvm.stacksave/llvm.stackrestore intrinsics.
+  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+    Module *M = Caller->getParent();
+    const Type *BytePtr = PointerType::get(Type::Int8Ty);
+    // Get the two intrinsics we care about.
+    Constant *StackSave, *StackRestore;
+    StackSave    = M->getOrInsertFunction("llvm.stacksave", BytePtr, NULL);
+    StackRestore = M->getOrInsertFunction("llvm.stackrestore", Type::VoidTy,
+                                          BytePtr, NULL);
+
+    // If we are preserving the callgraph, add edges to the stacksave/restore
+    // functions for the calls we insert.
+    CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
+    if (CG) {
+      // We know that StackSave/StackRestore are Function*'s, because they are
+      // intrinsics which must have the right types.
+      StackSaveCGN    = CG->getOrInsertFunction(cast<Function>(StackSave));
+      StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore));
+      CallerNode = (*CG)[Caller];
+    }
+      
+    // Insert the llvm.stacksave.
+    CallInst *SavedPtr = new CallInst(StackSave, "savedstack", 
+                                      FirstNewBlock->begin());
+    if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);
+      
+    // Insert a call to llvm.stackrestore before any return instructions in the
+    // inlined function.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      CallInst *CI = new CallInst(StackRestore, SavedPtr, "", Returns[i]);
+      if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
+    }
+
+    // Count the number of StackRestore calls we insert.
+    unsigned NumStackRestores = Returns.size();
+    
+    // If we are inlining an invoke instruction, insert restores before each
+    // unwind.  These unwinds will be rewritten into branches later.
+    if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
+      for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+           BB != E; ++BB)
+        if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+          new CallInst(StackRestore, SavedPtr, "", UI);
+          ++NumStackRestores;
+        }
+    }
+  }
+
+  // If we are inlining tail call instruction through a call site that isn't 
+  // marked 'tail', we must remove the tail marker for any calls in the inlined
+  // code.
+  if (MustClearTailCallFlags && InlinedFunctionInfo.ContainsCalls) {
+    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+         BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        if (CallInst *CI = dyn_cast<CallInst>(I))
+          CI->setTailCall(false);
+  }
+
+  // If we are inlining for an invoke instruction, we must make sure to rewrite
+  // any inlined 'unwind' instructions into branches to the invoke exception
+  // destination, and call instructions into invoke instructions.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+
+  // If we cloned in _exactly one_ basic block, and if that block ends in a
+  // return instruction, we splice the body of the inlined callee directly into
+  // the calling basic block.
+  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+    // Move all of the instructions right before the call.
+    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+                                 FirstNewBlock->begin(), FirstNewBlock->end());
+    // Remove the cloned basic block.
+    Caller->getBasicBlockList().pop_back();
+
+    // If the call site was an invoke instruction, add a branch to the normal
+    // destination.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+      new BranchInst(II->getNormalDest(), TheCall);
+
+    // If the return instruction returned a value, replace uses of the call with
+    // uses of the returned value.
+    if (!TheCall->use_empty())
+      TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+
+    // Since we are now done with the Call/Invoke, we can delete it.
+    TheCall->getParent()->getInstList().erase(TheCall);
+
+    // Since we are now done with the return instruction, delete it also.
+    Returns[0]->getParent()->getInstList().erase(Returns[0]);
+
+    // We are now done with the inlining.
+    return true;
+  }
+
+  // Otherwise, we have the normal case, of more than one block to inline or
+  // multiple return sites.
+
+  // We want to clone the entire callee function into the hole between the
+  // "starter" and "ender" blocks.  How we accomplish this depends on whether
+  // this is an invoke instruction or a call instruction.
+  BasicBlock *AfterCallBB;
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+
+    // Add an unconditional branch to make this look like the CallInst case...
+    BranchInst *NewBr = new BranchInst(II->getNormalDest(), TheCall);
+
+    // Split the basic block.  This guarantees that no PHI nodes will have to be
+    // updated due to new incoming edges, and make the invoke case more
+    // symmetric to the call case.
+    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+                                          CalledFunc->getName()+".exit");
+
+  } else {  // It's a call
+    // If this is a call instruction, we need to split the basic block that
+    // the call lives in.
+    //
+    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
+                                          CalledFunc->getName()+".exit");
+  }
+
+  // Change the branch that used to go to AfterCallBB to branch to the first
+  // basic block of the inlined function.
+  //
+  TerminatorInst *Br = OrigBB->getTerminator();
+  assert(Br && Br->getOpcode() == Instruction::Br &&
+         "splitBasicBlock broken!");
+  Br->setOperand(0, FirstNewBlock);
+
+
+  // Now that the function is correct, make it a little bit nicer.  In
+  // particular, move the basic blocks inserted from the end of the function
+  // into the space made by splitting the source basic block.
+  //
+  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
+                                     FirstNewBlock, Caller->end());
+
+  // Handle all of the return instructions that we just cloned in, and eliminate
+  // any users of the original call/invoke instruction.
+  if (Returns.size() > 1) {
+    // The PHI node should go at the front of the new basic block to merge all
+    // possible incoming values.
+    //
+    PHINode *PHI = 0;
+    if (!TheCall->use_empty()) {
+      PHI = new PHINode(CalledFunc->getReturnType(),
+                        TheCall->getName(), AfterCallBB->begin());
+
+      // Anything that used the result of the function call should now use the
+      // PHI node as their operand.
+      //
+      TheCall->replaceAllUsesWith(PHI);
+    }
+
+    // Loop over all of the return instructions, turning them into unconditional
+    // branches to the merge point now, and adding entries to the PHI node as
+    // appropriate.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      ReturnInst *RI = Returns[i];
+
+      if (PHI) {
+        assert(RI->getReturnValue() && "Ret should have value!");
+        assert(RI->getReturnValue()->getType() == PHI->getType() &&
+               "Ret value not consistent in function!");
+        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+      }
+
+      // Add a branch to the merge point where the PHI node lives if it exists.
+      new BranchInst(AfterCallBB, RI);
+
+      // Delete the return instruction now
+      RI->getParent()->getInstList().erase(RI);
+    }
+
+  } else if (!Returns.empty()) {
+    // Otherwise, if there is exactly one return value, just replace anything
+    // using the return value of the call with the computed value.
+    if (!TheCall->use_empty())
+      TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+
+    // Splice the code from the return block into the block that it will return
+    // to, which contains the code that was after the call.
+    BasicBlock *ReturnBB = Returns[0]->getParent();
+    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
+                                      ReturnBB->getInstList());
+
+    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+    ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+    // Delete the return instruction now and empty ReturnBB now.
+    Returns[0]->eraseFromParent();
+    ReturnBB->eraseFromParent();
+  } else if (!TheCall->use_empty()) {
+    // No returns, but something is using the return value of the call.  Just
+    // nuke the result.
+    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+  }
+
+  // Since we are now done with the Call/Invoke, we can delete it.
+  TheCall->eraseFromParent();
+
+  // We should always be able to fold the entry block of the function into the
+  // single predecessor of the block...
+  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+  // Splice the code entry block into calling block, right before the
+  // unconditional branch.
+  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
+
+  // Remove the unconditional branch.
+  OrigBB->getInstList().erase(Br);
+
+  // Now we can remove the CalleeEntry block, which is now empty.
+  Caller->getBasicBlockList().erase(CalleeEntry);
+  
+  return true;
+}
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 0000000..220241d
--- /dev/null
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,269 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Owen Anderson and is distributed under the
+// University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary.  For example, it turns
+// the left into the right code:
+// 
+// for (...)                for (...)
+//   if (c)                   if (c)
+//     X1 = ...                 X1 = ...
+//   else                     else
+//     X2 = ...                 X2 = ...
+//   X3 = phi(X1, X2)         X3 = phi(X1, X2)
+// ... = X3 + 4              X4 = phi(X3)
+//                           ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine.  The major benefit of this 
+// transformation is that it makes many other loop optimizations, such as 
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lcssa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+namespace {
+  struct VISIBILITY_HIDDEN LCSSA : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    LCSSA() : LoopPass((intptr_t)&ID) {}
+
+    // Cached analysis information for the current function.
+    LoopInfo *LI;
+    DominatorTree *DT;
+    std::vector<BasicBlock*> LoopBlocks;
+    
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    void ProcessInstruction(Instruction* Instr,
+                            const std::vector<BasicBlock*>& exitBlocks);
+    
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG.  It maintains both of these,
+    /// as well as the CFG.  It also requires dominator information.
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequired<DominatorTree>();
+      AU.addPreserved<ScalarEvolution>();
+    }
+  private:
+    void getLoopValuesUsedOutsideLoop(Loop *L,
+                                      SetVector<Instruction*> &AffectedValues);
+
+    Value *GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst,
+                            std::map<DomTreeNode*, Value*> &Phis);
+
+    /// inLoop - returns true if the given block is within the current loop
+    const bool inLoop(BasicBlock* B) {
+      return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
+    }
+  };
+  
+  char LCSSA::ID = 0;
+  RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass");
+}
+
+LoopPass *llvm::createLCSSAPass() { return new LCSSA(); }
+const PassInfo *llvm::LCSSAID = X.getPassInfo();
+
+/// runOnFunction - Process all loops in the function, inner-most out.
+bool LCSSA::runOnLoop(Loop *L, LPPassManager &LPM) {
+  
+  LI = &LPM.getAnalysis<LoopInfo>();
+  DT = &getAnalysis<DominatorTree>();
+    
+  // Speed up queries by creating a sorted list of blocks
+  LoopBlocks.clear();
+  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+  std::sort(LoopBlocks.begin(), LoopBlocks.end());
+  
+  SetVector<Instruction*> AffectedValues;
+  getLoopValuesUsedOutsideLoop(L, AffectedValues);
+  
+  // If no values are affected, we can save a lot of work, since we know that
+  // nothing will be changed.
+  if (AffectedValues.empty())
+    return false;
+  
+  std::vector<BasicBlock*> exitBlocks;
+  L->getExitBlocks(exitBlocks);
+  
+  
+  // Iterate over all affected values for this loop and insert Phi nodes
+  // for them in the appropriate exit blocks
+  
+  for (SetVector<Instruction*>::iterator I = AffectedValues.begin(),
+       E = AffectedValues.end(); I != E; ++I)
+    ProcessInstruction(*I, exitBlocks);
+  
+  assert(L->isLCSSAForm());
+  
+  return true;
+}
+
+/// processInstruction - Given a live-out instruction, insert LCSSA Phi nodes,
+/// eliminate all out-of-loop uses.
+void LCSSA::ProcessInstruction(Instruction *Instr,
+                               const std::vector<BasicBlock*>& exitBlocks) {
+  ++NumLCSSA; // We are applying the transformation
+
+  // Keep track of the blocks that have the value available already.
+  std::map<DomTreeNode*, Value*> Phis;
+
+  DomTreeNode *InstrNode = DT->getNode(Instr->getParent());
+
+  // Insert the LCSSA phi's into the exit blocks (dominated by the value), and
+  // add them to the Phi's map.
+  for (std::vector<BasicBlock*>::const_iterator BBI = exitBlocks.begin(),
+      BBE = exitBlocks.end(); BBI != BBE; ++BBI) {
+    BasicBlock *BB = *BBI;
+    DomTreeNode *ExitBBNode = DT->getNode(BB);
+    Value *&Phi = Phis[ExitBBNode];
+    if (!Phi && DT->dominates(InstrNode, ExitBBNode)) {
+      PHINode *PN = new PHINode(Instr->getType(), Instr->getName()+".lcssa",
+                                BB->begin());
+      PN->reserveOperandSpace(std::distance(pred_begin(BB), pred_end(BB)));
+
+      // Remember that this phi makes the value alive in this block.
+      Phi = PN;
+
+      // Add inputs from inside the loop for this PHI.
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+        PN->addIncoming(Instr, *PI);
+    }
+  }
+  
+  
+  // Record all uses of Instr outside the loop.  We need to rewrite these.  The
+  // LCSSA phis won't be included because they use the value in the loop.
+  for (Value::use_iterator UI = Instr->use_begin(), E = Instr->use_end();
+       UI != E;) {
+    BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
+    if (PHINode *P = dyn_cast<PHINode>(*UI)) {
+      unsigned OperandNo = UI.getOperandNo();
+      UserBB = P->getIncomingBlock(OperandNo/2);
+    }
+    
+    // If the user is in the loop, don't rewrite it!
+    if (UserBB == Instr->getParent() || inLoop(UserBB)) {
+      ++UI;
+      continue;
+    }
+    
+    // Otherwise, patch up uses of the value with the appropriate LCSSA Phi,
+    // inserting PHI nodes into join points where needed.
+    Value *Val = GetValueForBlock(DT->getNode(UserBB), Instr, Phis);
+    
+    // Preincrement the iterator to avoid invalidating it when we change the
+    // value.
+    Use &U = UI.getUse();
+    ++UI;
+    U.set(Val);
+  }
+}
+
+/// getLoopValuesUsedOutsideLoop - Return any values defined in the loop that
+/// are used by instructions outside of it.
+void LCSSA::getLoopValuesUsedOutsideLoop(Loop *L,
+                                      SetVector<Instruction*> &AffectedValues) {
+  // FIXME: For large loops, we may be able to avoid a lot of use-scanning
+  // by using dominance information.  In particular, if a block does not
+  // dominate any of the loop exits, then none of the values defined in the
+  // block could be used outside the loop.
+  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I)
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+           ++UI) {
+        BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
+        if (PHINode* p = dyn_cast<PHINode>(*UI)) {
+          unsigned OperandNo = UI.getOperandNo();
+          UserBB = p->getIncomingBlock(OperandNo/2);
+        }
+        
+        if (*BB != UserBB && !inLoop(UserBB)) {
+          AffectedValues.insert(I);
+          break;
+        }
+      }
+  }
+}
+
+/// GetValueForBlock - Get the value to use within the specified basic block.
+/// available values are in Phis.
+Value *LCSSA::GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst,
+                               std::map<DomTreeNode*, Value*> &Phis) {
+  // If there is no dominator info for this BB, it is unreachable.
+  if (BB == 0)
+    return UndefValue::get(OrigInst->getType());
+                                 
+  // If we have already computed this value, return the previously computed val.
+  Value *&V = Phis[BB];
+  if (V) return V;
+
+  DomTreeNode *IDom = BB->getIDom();
+
+  // If the block has no dominator, bail
+  if (!IDom)
+    return V = UndefValue::get(OrigInst->getType());
+
+  // Otherwise, there are two cases: we either have to insert a PHI node or we
+  // don't.  We need to insert a PHI node if this block is not dominated by one
+  // of the exit nodes from the loop (the loop could have multiple exits, and
+  // though the value defined *inside* the loop dominated all its uses, each
+  // exit by itself may not dominate all the uses).
+  //
+  // The simplest way to check for this condition is by checking to see if the
+  // idom is in the loop.  If so, we *know* that none of the exit blocks
+  // dominate this block.  Note that we *know* that the block defining the
+  // original instruction is in the idom chain, because if it weren't, then the
+  // original value didn't dominate this use.
+  if (!inLoop(IDom->getBlock())) {
+    // Idom is not in the loop, we must still be "below" the exit block and must
+    // be fully dominated by the value live in the idom.
+    return V = GetValueForBlock(IDom, OrigInst, Phis);
+  }
+  
+  BasicBlock *BBN = BB->getBlock();
+  
+  // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so
+  // now, then get values to fill in the incoming values for the PHI.
+  PHINode *PN = new PHINode(OrigInst->getType(), OrigInst->getName()+".lcssa",
+                            BBN->begin());
+  PN->reserveOperandSpace(std::distance(pred_begin(BBN), pred_end(BBN)));
+  V = PN;
+                                 
+  // Fill in the incoming values for the block.
+  for (pred_iterator PI = pred_begin(BBN), E = pred_end(BBN); PI != E; ++PI)
+    PN->addIncoming(GetValueForBlock(DT->getNode(*PI), OrigInst, Phis), *PI);
+  return PN;
+}
+
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 0000000..5e2d237
--- /dev/null
+++ b/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,200 @@
+//===-- Local.cpp - Functions to perform local transformations ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include <cerrno>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Local constant propagation...
+//
+
+/// doConstantPropagation - If an instruction references constants, try to fold
+/// them together...
+///
+bool llvm::doConstantPropagation(BasicBlock::iterator &II,
+                                 const TargetData *TD) {
+  if (Constant *C = ConstantFoldInstruction(II, TD)) {
+    // Replaces all of the uses of a variable with uses of the constant.
+    II->replaceAllUsesWith(C);
+
+    // Remove the instruction from the basic block...
+    II = II->getParent()->getInstList().erase(II);
+    return true;
+  }
+
+  return false;
+}
+
+// ConstantFoldTerminator - If a terminator instruction is predicated on a
+// constant value, convert it into an unconditional branch to the constant
+// destination.
+//
+bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
+  TerminatorInst *T = BB->getTerminator();
+
+  // Branch - See if we are conditional jumping on constant
+  if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+    if (BI->isUnconditional()) return false;  // Can't optimize uncond branch
+    BasicBlock *Dest1 = cast<BasicBlock>(BI->getOperand(0));
+    BasicBlock *Dest2 = cast<BasicBlock>(BI->getOperand(1));
+
+    if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+      // Are we branching on constant?
+      // YES.  Change to unconditional branch...
+      BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+      BasicBlock *OldDest     = Cond->getZExtValue() ? Dest2 : Dest1;
+
+      //cerr << "Function: " << T->getParent()->getParent()
+      //     << "\nRemoving branch from " << T->getParent()
+      //     << "\n\nTo: " << OldDest << endl;
+
+      // Let the basic block know that we are letting go of it.  Based on this,
+      // it will adjust it's PHI nodes.
+      assert(BI->getParent() && "Terminator not inserted in block!");
+      OldDest->removePredecessor(BI->getParent());
+
+      // Set the unconditional destination, and change the insn to be an
+      // unconditional branch.
+      BI->setUnconditionalDest(Destination);
+      return true;
+    } else if (Dest2 == Dest1) {       // Conditional branch to same location?
+      // This branch matches something like this:
+      //     br bool %cond, label %Dest, label %Dest
+      // and changes it into:  br label %Dest
+
+      // Let the basic block know that we are letting go of one copy of it.
+      assert(BI->getParent() && "Terminator not inserted in block!");
+      Dest1->removePredecessor(BI->getParent());
+
+      // Change a conditional branch to unconditional.
+      BI->setUnconditionalDest(Dest1);
+      return true;
+    }
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+    // If we are switching on a constant, we can convert the switch into a
+    // single branch instruction!
+    ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+    BasicBlock *TheOnlyDest = SI->getSuccessor(0);  // The default dest
+    BasicBlock *DefaultDest = TheOnlyDest;
+    assert(TheOnlyDest == SI->getDefaultDest() &&
+           "Default destination is not successor #0?");
+
+    // Figure out which case it goes to...
+    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+      // Found case matching a constant operand?
+      if (SI->getSuccessorValue(i) == CI) {
+        TheOnlyDest = SI->getSuccessor(i);
+        break;
+      }
+
+      // Check to see if this branch is going to the same place as the default
+      // dest.  If so, eliminate it as an explicit compare.
+      if (SI->getSuccessor(i) == DefaultDest) {
+        // Remove this entry...
+        DefaultDest->removePredecessor(SI->getParent());
+        SI->removeCase(i);
+        --i; --e;  // Don't skip an entry...
+        continue;
+      }
+
+      // Otherwise, check to see if the switch only branches to one destination.
+      // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+      // destinations.
+      if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+    }
+
+    if (CI && !TheOnlyDest) {
+      // Branching on a constant, but not any of the cases, go to the default
+      // successor.
+      TheOnlyDest = SI->getDefaultDest();
+    }
+
+    // If we found a single destination that we can fold the switch into, do so
+    // now.
+    if (TheOnlyDest) {
+      // Insert the new branch..
+      new BranchInst(TheOnlyDest, SI);
+      BasicBlock *BB = SI->getParent();
+
+      // Remove entries from PHI nodes which we no longer branch to...
+      for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+        // Found case matching a constant operand?
+        BasicBlock *Succ = SI->getSuccessor(i);
+        if (Succ == TheOnlyDest)
+          TheOnlyDest = 0;  // Don't modify the first branch to TheOnlyDest
+        else
+          Succ->removePredecessor(BB);
+      }
+
+      // Delete the old switch...
+      BB->getInstList().erase(SI);
+      return true;
+    } else if (SI->getNumSuccessors() == 2) {
+      // Otherwise, we can fold this switch into a conditional branch
+      // instruction if it has only one non-default destination.
+      Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, SI->getCondition(),
+                                 SI->getSuccessorValue(1), "cond", SI);
+      // Insert the new branch...
+      new BranchInst(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI);
+
+      // Delete the old switch...
+      SI->getParent()->getInstList().erase(SI);
+      return true;
+    }
+  }
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Local dead code elimination...
+//
+
+bool llvm::isInstructionTriviallyDead(Instruction *I) {
+  if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+
+  if (!I->mayWriteToMemory()) return true;
+
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    if (Function *F = CI->getCalledFunction()) {
+      unsigned IntrinsicID = F->getIntrinsicID();
+#define GET_SIDE_EFFECT_INFO
+#include "llvm/Intrinsics.gen"
+#undef GET_SIDE_EFFECT_INFO
+    }
+  return false;
+}
+
+// dceInstruction - Inspect the instruction at *BBI and figure out if it's
+// [trivially] dead.  If so, remove the instruction and update the iterator
+// to point to the instruction that immediately succeeded the original
+// instruction.
+//
+bool llvm::dceInstruction(BasicBlock::iterator &BBI) {
+  // Look for un"used" definitions...
+  if (isInstructionTriviallyDead(BBI)) {
+    BBI = BBI->getParent()->getInstList().erase(BBI);   // Bye bye
+    return true;
+  }
+  return false;
+}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 0000000..0a5de2b
--- /dev/null
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,692 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header.  This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header).  This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopsimplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
+STATISTIC(NumNested  , "Number of nested loops split out");
+
+namespace {
+  struct VISIBILITY_HIDDEN LoopSimplify : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    LoopSimplify() : FunctionPass((intptr_t)&ID) {}
+
+    // AA - If we have an alias analysis object to update, this is it, otherwise
+    // this is null.
+    AliasAnalysis *AA;
+    LoopInfo *LI;
+    DominatorTree *DT;
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // We need loop information to identify the loops...
+      AU.addRequired<LoopInfo>();
+      AU.addRequired<DominatorTree>();
+
+      AU.addPreserved<LoopInfo>();
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<DominanceFrontier>();
+      AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
+    }
+  private:
+    bool ProcessLoop(Loop *L);
+    BasicBlock *SplitBlockPredecessors(BasicBlock *BB, const char *Suffix,
+                                       const std::vector<BasicBlock*> &Preds);
+    BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+    void InsertPreheaderForLoop(Loop *L);
+    Loop *SeparateNestedLoop(Loop *L);
+    void InsertUniqueBackedgeBlock(Loop *L);
+    void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+                                  std::vector<BasicBlock*> &SplitPreds,
+                                  Loop *L);
+  };
+
+  char LoopSimplify::ID = 0;
+  RegisterPass<LoopSimplify>
+  X("loopsimplify", "Canonicalize natural loops", true);
+}
+
+// Publically exposed interface to pass...
+const PassInfo *llvm::LoopSimplifyID = X.getPassInfo();
+FunctionPass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnFunction(Function &F) {
+  bool Changed = false;
+  LI = &getAnalysis<LoopInfo>();
+  AA = getAnalysisToUpdate<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+
+  // Check to see that no blocks (other than the header) in loops have
+  // predecessors that are not in loops.  This is not valid for natural loops,
+  // but can occur if the blocks are unreachable.  Since they are unreachable we
+  // can just shamelessly destroy their terminators to make them not branch into
+  // the loop!
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    // This case can only occur for unreachable blocks.  Blocks that are
+    // unreachable can't be in loops, so filter those blocks out.
+    if (LI->getLoopFor(BB)) continue;
+    
+    bool BlockUnreachable = false;
+    TerminatorInst *TI = BB->getTerminator();
+
+    // Check to see if any successors of this block are non-loop-header loops
+    // that are not the header.
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+      // If this successor is not in a loop, BB is clearly ok.
+      Loop *L = LI->getLoopFor(TI->getSuccessor(i));
+      if (!L) continue;
+      
+      // If the succ is the loop header, and if L is a top-level loop, then this
+      // is an entrance into a loop through the header, which is also ok.
+      if (L->getHeader() == TI->getSuccessor(i) && L->getParentLoop() == 0)
+        continue;
+      
+      // Otherwise, this is an entrance into a loop from some place invalid.
+      // Either the loop structure is invalid and this is not a natural loop (in
+      // which case the compiler is buggy somewhere else) or BB is unreachable.
+      BlockUnreachable = true;
+      break;
+    }
+    
+    // If this block is ok, check the next one.
+    if (!BlockUnreachable) continue;
+    
+    // Otherwise, this block is dead.  To clean up the CFG and to allow later
+    // loop transformations to ignore this case, we delete the edges into the
+    // loop by replacing the terminator.
+    
+    // Remove PHI entries from the successors.
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      TI->getSuccessor(i)->removePredecessor(BB);
+   
+    // Add a new unreachable instruction.
+    new UnreachableInst(TI);
+    
+    // Delete the dead terminator.
+    if (AA) AA->deleteValue(&BB->back());
+    BB->getInstList().pop_back();
+    Changed |= true;
+  }
+  
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    Changed |= ProcessLoop(*I);
+
+  return Changed;
+}
+
+/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
+/// all loops have preheaders.
+///
+bool LoopSimplify::ProcessLoop(Loop *L) {
+  bool Changed = false;
+ReprocessLoop:
+  
+  // Canonicalize inner loops before outer loops.  Inner loop canonicalization
+  // can provide work for the outer loop to canonicalize.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    Changed |= ProcessLoop(*I);
+  
+  assert(L->getBlocks()[0] == L->getHeader() &&
+         "Header isn't first block in loop?");
+
+  // Does the loop already have a preheader?  If so, don't insert one.
+  if (L->getLoopPreheader() == 0) {
+    InsertPreheaderForLoop(L);
+    NumInserted++;
+    Changed = true;
+  }
+
+  // Next, check to make sure that all exit nodes of the loop only have
+  // predecessors that are inside of the loop.  This check guarantees that the
+  // loop preheader/header will dominate the exit blocks.  If the exit block has
+  // predecessors from outside of the loop, split the edge now.
+  std::vector<BasicBlock*> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+    
+  SetVector<BasicBlock*> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
+  for (SetVector<BasicBlock*>::iterator I = ExitBlockSet.begin(),
+         E = ExitBlockSet.end(); I != E; ++I) {
+    BasicBlock *ExitBlock = *I;
+    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+         PI != PE; ++PI)
+      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+      // allowed.
+      if (!L->contains(*PI)) {
+        RewriteLoopExitBlock(L, ExitBlock);
+        NumInserted++;
+        Changed = true;
+        break;
+      }
+  }
+
+  // If the header has more than two predecessors at this point (from the
+  // preheader and from multiple backedges), we must adjust the loop.
+  unsigned NumBackedges = L->getNumBackEdges();
+  if (NumBackedges != 1) {
+    // If this is really a nested loop, rip it out into a child loop.  Don't do
+    // this for loops with a giant number of backedges, just factor them into a
+    // common backedge instead.
+    if (NumBackedges < 8) {
+      if (Loop *NL = SeparateNestedLoop(L)) {
+        ++NumNested;
+        // This is a big restructuring change, reprocess the whole loop.
+        ProcessLoop(NL);
+        Changed = true;
+        // GCC doesn't tail recursion eliminate this.
+        goto ReprocessLoop;
+      }
+    }
+
+    // If we either couldn't, or didn't want to, identify nesting of the loops,
+    // insert a new block that all backedges target, then make it jump to the
+    // loop header.
+    InsertUniqueBackedgeBlock(L);
+    NumInserted++;
+    Changed = true;
+  }
+
+  // Scan over the PHI nodes in the loop header.  Since they now have only two
+  // incoming values (the loop is canonicalized), we may have simplified the PHI
+  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+  PHINode *PN;
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       (PN = dyn_cast<PHINode>(I++)); )
+    if (Value *V = PN->hasConstantValue()) {
+        PN->replaceAllUsesWith(V);
+        PN->eraseFromParent();
+      }
+
+  return Changed;
+}
+
+/// SplitBlockPredecessors - Split the specified block into two blocks.  We want
+/// to move the predecessors specified in the Preds list to point to the new
+/// block, leaving the remaining predecessors pointing to BB.  This method
+/// updates the SSA PHINode's, but no other analyses.
+///
+BasicBlock *LoopSimplify::SplitBlockPredecessors(BasicBlock *BB,
+                                                 const char *Suffix,
+                                       const std::vector<BasicBlock*> &Preds) {
+
+  // Create new basic block, insert right before the original block...
+  BasicBlock *NewBB = new BasicBlock(BB->getName()+Suffix, BB->getParent(), BB);
+
+  // The preheader first gets an unconditional branch to the loop header...
+  BranchInst *BI = new BranchInst(BB, NewBB);
+
+  // For every PHI node in the block, insert a PHI node into NewBB where the
+  // incoming values from the out of loop edges are moved to NewBB.  We have two
+  // possible cases here.  If the loop is dead, we just insert dummy entries
+  // into the PHI nodes for the new edge.  If the loop is not dead, we move the
+  // incoming edges in BB into new PHI nodes in NewBB.
+  //
+  if (!Preds.empty()) {  // Is the loop not obviously dead?
+    // Check to see if the values being merged into the new block need PHI
+    // nodes.  If so, insert them.
+    for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
+      PHINode *PN = cast<PHINode>(I);
+      ++I;
+
+      // Check to see if all of the values coming in are the same.  If so, we
+      // don't need to create a new PHI node.
+      Value *InVal = PN->getIncomingValueForBlock(Preds[0]);
+      for (unsigned i = 1, e = Preds.size(); i != e; ++i)
+        if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+          InVal = 0;
+          break;
+        }
+
+      // If the values coming into the block are not the same, we need a PHI.
+      if (InVal == 0) {
+        // Create the new PHI node, insert it into NewBB at the end of the block
+        PHINode *NewPHI = new PHINode(PN->getType(), PN->getName()+".ph", BI);
+        if (AA) AA->copyValue(PN, NewPHI);
+
+        // Move all of the edges from blocks outside the loop to the new PHI
+        for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+          Value *V = PN->removeIncomingValue(Preds[i], false);
+          NewPHI->addIncoming(V, Preds[i]);
+        }
+        InVal = NewPHI;
+      } else {
+        // Remove all of the edges coming into the PHI nodes from outside of the
+        // block.
+        for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+          PN->removeIncomingValue(Preds[i], false);
+      }
+
+      // Add an incoming value to the PHI node in the loop for the preheader
+      // edge.
+      PN->addIncoming(InVal, NewBB);
+
+      // Can we eliminate this phi node now?
+      if (Value *V = PN->hasConstantValue(true)) {
+        Instruction *I = dyn_cast<Instruction>(V);
+        // If I is in NewBB, the Dominator call will fail, because NewBB isn't
+        // registered in DominatorTree yet.  Handle this case explicitly.
+        if (!I || (I->getParent() != NewBB &&
+                   getAnalysis<DominatorTree>().dominates(I, PN))) {
+          PN->replaceAllUsesWith(V);
+          if (AA) AA->deleteValue(PN);
+          BB->getInstList().erase(PN);
+        }
+      }
+    }
+
+    // Now that the PHI nodes are updated, actually move the edges from
+    // Preds to point to NewBB instead of BB.
+    //
+    for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+      TerminatorInst *TI = Preds[i]->getTerminator();
+      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s)
+        if (TI->getSuccessor(s) == BB)
+          TI->setSuccessor(s, NewBB);
+    }
+
+  } else {                       // Otherwise the loop is dead...
+    for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PN = cast<PHINode>(I);
+      // Insert dummy values as the incoming value...
+      PN->addIncoming(Constant::getNullValue(PN->getType()), NewBB);
+    }
+  }
+
+  return NewBB;
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one.  This method has two phases:
+/// preheader insertion and analysis updating.
+///
+void LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+  BasicBlock *Header = L->getHeader();
+
+  // Compute the set of predecessors of the loop that are not in the loop.
+  std::vector<BasicBlock*> OutsideBlocks;
+  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+       PI != PE; ++PI)
+    if (!L->contains(*PI))           // Coming in from outside the loop?
+      OutsideBlocks.push_back(*PI);  // Keep track of it...
+
+  // Split out the loop pre-header.
+  BasicBlock *NewBB =
+    SplitBlockPredecessors(Header, ".preheader", OutsideBlocks);
+  
+
+  //===--------------------------------------------------------------------===//
+  //  Update analysis results now that we have performed the transformation
+  //
+
+  // We know that we have loop information to update... update it now.
+  if (Loop *Parent = L->getParentLoop())
+    Parent->addBasicBlockToLoop(NewBB, *LI);
+
+  DT->splitBlock(NewBB);
+  if (DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>())
+    DF->splitBlock(NewBB);
+
+  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+  // code layout too horribly.
+  PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+}
+
+/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
+/// blocks.  This method is used to split exit blocks that have predecessors
+/// outside of the loop.
+BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+  std::vector<BasicBlock*> LoopBlocks;
+  for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I)
+    if (L->contains(*I))
+      LoopBlocks.push_back(*I);
+
+  assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
+  BasicBlock *NewBB = SplitBlockPredecessors(Exit, ".loopexit", LoopBlocks);
+
+  // Update Loop Information - we know that the new block will be in whichever
+  // loop the Exit block is in.  Note that it may not be in that immediate loop,
+  // if the successor is some other loop header.  In that case, we continue 
+  // walking up the loop tree to find a loop that contains both the successor
+  // block and the predecessor block.
+  Loop *SuccLoop = LI->getLoopFor(Exit);
+  while (SuccLoop && !SuccLoop->contains(L->getHeader()))
+    SuccLoop = SuccLoop->getParentLoop();
+  if (SuccLoop)
+    SuccLoop->addBasicBlockToLoop(NewBB, *LI);
+
+  // Update Dominator Information
+  DT->splitBlock(NewBB);
+  if (DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>())
+    DF->splitBlock(NewBB);
+
+  return NewBB;
+}
+
+/// AddBlockAndPredsToSet - Add the specified block, and all of its
+/// predecessors, to the specified set, if it's not already in there.  Stop
+/// predecessor traversal when we reach StopBlock.
+static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+                                  std::set<BasicBlock*> &Blocks) {
+  std::vector<BasicBlock *> WorkList;
+  WorkList.push_back(InputBB);
+  do {
+    BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+    if (Blocks.insert(BB).second && BB != StopBlock)
+      // If BB is not already processed and it is not a stop block then
+      // insert its predecessor in the work list
+      for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+        BasicBlock *WBB = *I;
+        WorkList.push_back(WBB);
+      }
+  } while(!WorkList.empty());
+}
+
+/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
+/// PHI node that tells us how to partition the loops.
+static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+                                        AliasAnalysis *AA) {
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+    PHINode *PN = cast<PHINode>(I);
+    ++I;
+    if (Value *V = PN->hasConstantValue())
+      if (!isa<Instruction>(V) || DT->dominates(cast<Instruction>(V), PN)) {
+        // This is a degenerate PHI already, don't modify it!
+        PN->replaceAllUsesWith(V);
+        if (AA) AA->deleteValue(PN);
+        PN->eraseFromParent();
+        continue;
+      }
+
+    // Scan this PHI node looking for a use of the PHI node by itself.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == PN &&
+          L->contains(PN->getIncomingBlock(i)))
+        // We found something tasty to remove.
+        return PN;
+  }
+  return 0;
+}
+
+// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
+// right after some 'outside block' block.  This prevents the preheader from
+// being placed inside the loop body, e.g. when the loop hasn't been rotated.
+void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
+                                            std::vector<BasicBlock*>&SplitPreds,
+                                            Loop *L) {
+  // Check to see if NewBB is already well placed.
+  Function::iterator BBI = NewBB; --BBI;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    if (&*BBI == SplitPreds[i])
+      return;
+  }
+  
+  // If it isn't already after an outside block, move it after one.  This is
+  // always good as it makes the uncond branch from the outside block into a
+  // fall-through.
+  
+  // Figure out *which* outside block to put this after.  Prefer an outside
+  // block that neighbors a BB actually in the loop.
+  BasicBlock *FoundBB = 0;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    Function::iterator BBI = SplitPreds[i];
+    if (++BBI != NewBB->getParent()->end() && 
+        L->contains(BBI)) {
+      FoundBB = SplitPreds[i];
+      break;
+    }
+  }
+  
+  // If our heuristic for a *good* bb to place this after doesn't find
+  // anything, just pick something.  It's likely better than leaving it within
+  // the loop.
+  if (!FoundBB)
+    FoundBB = SplitPreds[0];
+  NewBB->moveAfter(FoundBB);
+}
+
+
+/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
+/// them out into a nested loop.  This is important for code that looks like
+/// this:
+///
+///  Loop:
+///     ...
+///     br cond, Loop, Next
+///     ...
+///     br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop.  PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {
+  PHINode *PN = FindPHIToPartitionLoops(L, DT, AA);
+  if (PN == 0) return 0;  // No known way to partition.
+
+  // Pull out all predecessors that have varying values in the loop.  This
+  // handles the case when a PHI node has multiple instances of itself as
+  // arguments.
+  std::vector<BasicBlock*> OuterLoopPreds;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) != PN ||
+        !L->contains(PN->getIncomingBlock(i)))
+      OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *NewBB = SplitBlockPredecessors(Header, ".outer", OuterLoopPreds);
+
+  // Update dominator information
+  DT->splitBlock(NewBB);
+  if (DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>())
+    DF->splitBlock(NewBB);
+
+  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+  // code layout too horribly.
+  PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+  
+  // Create the new outer loop.
+  Loop *NewOuter = new Loop();
+
+  // Change the parent loop to use the outer loop as its child now.
+  if (Loop *Parent = L->getParentLoop())
+    Parent->replaceChildLoopWith(L, NewOuter);
+  else
+    LI->changeTopLevelLoop(L, NewOuter);
+
+  // This block is going to be our new header block: add it to this loop and all
+  // parent loops.
+  NewOuter->addBasicBlockToLoop(NewBB, *LI);
+
+  // L is now a subloop of our outer loop.
+  NewOuter->addChildLoop(L);
+
+  for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i)
+    NewOuter->addBlockEntry(L->getBlocks()[i]);
+
+  // Determine which blocks should stay in L and which should be moved out to
+  // the Outer loop now.
+  std::set<BasicBlock*> BlocksInL;
+  for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); PI!=E; ++PI)
+    if (DT->dominates(Header, *PI))
+      AddBlockAndPredsToSet(*PI, Header, BlocksInL);
+
+
+  // Scan all of the loop children of L, moving them to OuterLoop if they are
+  // not part of the inner loop.
+  const std::vector<Loop*> &SubLoops = L->getSubLoops();
+  for (size_t I = 0; I != SubLoops.size(); )
+    if (BlocksInL.count(SubLoops[I]->getHeader()))
+      ++I;   // Loop remains in L
+    else
+      NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+  // Now that we know which blocks are in L and which need to be moved to
+  // OuterLoop, move any blocks that need it.
+  for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+    BasicBlock *BB = L->getBlocks()[i];
+    if (!BlocksInL.count(BB)) {
+      // Move this block to the parent, updating the exit blocks sets
+      L->removeBlockFromLoop(BB);
+      if ((*LI)[BB] == L)
+        LI->changeLoopFor(BB, NewOuter);
+      --i;
+    }
+  }
+
+  return NewOuter;
+}
+
+
+
+/// InsertUniqueBackedgeBlock - This method is called when the specified loop
+/// has more than one backedge in it.  If this occurs, revector all of these
+/// backedges to target a new basic block and have that block branch to the loop
+/// header.  This ensures that loops have exactly one backedge.
+///
+void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) {
+  assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+  // Get information about the loop
+  BasicBlock *Preheader = L->getLoopPreheader();
+  BasicBlock *Header = L->getHeader();
+  Function *F = Header->getParent();
+
+  // Figure out which basic blocks contain back-edges to the loop header.
+  std::vector<BasicBlock*> BackedgeBlocks;
+  for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I)
+    if (*I != Preheader) BackedgeBlocks.push_back(*I);
+
+  // Create and insert the new backedge block...
+  BasicBlock *BEBlock = new BasicBlock(Header->getName()+".backedge", F);
+  BranchInst *BETerminator = new BranchInst(Header, BEBlock);
+
+  // Move the new backedge block to right after the last backedge block.
+  Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+  F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+  // Now that the block has been inserted into the function, create PHI nodes in
+  // the backedge block which correspond to any PHI nodes in the header block.
+  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    PHINode *NewPN = new PHINode(PN->getType(), PN->getName()+".be",
+                                 BETerminator);
+    NewPN->reserveOperandSpace(BackedgeBlocks.size());
+    if (AA) AA->copyValue(PN, NewPN);
+
+    // Loop over the PHI node, moving all entries except the one for the
+    // preheader over to the new PHI node.
+    unsigned PreheaderIdx = ~0U;
+    bool HasUniqueIncomingValue = true;
+    Value *UniqueValue = 0;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *IBB = PN->getIncomingBlock(i);
+      Value *IV = PN->getIncomingValue(i);
+      if (IBB == Preheader) {
+        PreheaderIdx = i;
+      } else {
+        NewPN->addIncoming(IV, IBB);
+        if (HasUniqueIncomingValue) {
+          if (UniqueValue == 0)
+            UniqueValue = IV;
+          else if (UniqueValue != IV)
+            HasUniqueIncomingValue = false;
+        }
+      }
+    }
+
+    // Delete all of the incoming values from the old PN except the preheader's
+    assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+    if (PreheaderIdx != 0) {
+      PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+      PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+    }
+    // Nuke all entries except the zero'th.
+    for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+      PN->removeIncomingValue(e-i, false);
+
+    // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+    PN->addIncoming(NewPN, BEBlock);
+
+    // As an optimization, if all incoming values in the new PhiNode (which is a
+    // subset of the incoming values of the old PHI node) have the same value,
+    // eliminate the PHI Node.
+    if (HasUniqueIncomingValue) {
+      NewPN->replaceAllUsesWith(UniqueValue);
+      if (AA) AA->deleteValue(NewPN);
+      BEBlock->getInstList().erase(NewPN);
+    }
+  }
+
+  // Now that all of the PHI nodes have been inserted and adjusted, modify the
+  // backedge blocks to just to the BEBlock instead of the header.
+  for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+    TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+    for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
+      if (TI->getSuccessor(Op) == Header)
+        TI->setSuccessor(Op, BEBlock);
+  }
+
+  //===--- Update all analyses which we must preserve now -----------------===//
+
+  // Update Loop Information - we know that this block is now in the current
+  // loop and all parent loops.
+  L->addBasicBlockToLoop(BEBlock, *LI);
+
+  // Update dominator information
+  DT->splitBlock(BEBlock);
+  if (DominanceFrontier *DF = getAnalysisToUpdate<DominanceFrontier>())
+    DF->splitBlock(BEBlock);
+}
+
+
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
new file mode 100644
index 0000000..7ce2479
--- /dev/null
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -0,0 +1,176 @@
+//===- LowerAllocations.cpp - Reduce malloc & free insts to calls ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerAllocations transformation is a target-dependent tranformation
+// because it depends on the size of data types and alignment constraints.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerallocs"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumLowered, "Number of allocations lowered");
+
+namespace {
+  /// LowerAllocations - Turn malloc and free instructions into %malloc and
+  /// %free calls.
+  ///
+  class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass {
+    Constant *MallocFunc;   // Functions in the module we are processing
+    Constant *FreeFunc;     // Initialized by doInitialization
+    bool LowerMallocArgToInteger;
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LowerAllocations(bool LowerToInt = false)
+      : BasicBlockPass((intptr_t)&ID), MallocFunc(0), FreeFunc(0), 
+        LowerMallocArgToInteger(LowerToInt) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetData>();
+      AU.setPreservesCFG();
+
+      // This is a cluster of orthogonal Transforms:
+      AU.addPreserved<UnifyFunctionExitNodes>();
+      AU.addPreservedID(PromoteMemoryToRegisterID);
+      AU.addPreservedID(LowerSelectID);
+      AU.addPreservedID(LowerSwitchID);
+      AU.addPreservedID(LowerInvokePassID);
+    }
+
+    /// doPassInitialization - For the lower allocations pass, this ensures that
+    /// a module contains a declaration for a malloc and a free function.
+    ///
+    bool doInitialization(Module &M);
+
+    virtual bool doInitialization(Function &F) {
+      return BasicBlockPass::doInitialization(F);
+    }
+
+    /// runOnBasicBlock - This method does the actual work of converting
+    /// instructions over, assuming that the pass has already been initialized.
+    ///
+    bool runOnBasicBlock(BasicBlock &BB);
+  };
+
+  char LowerAllocations::ID = 0;
+  RegisterPass<LowerAllocations>
+  X("lowerallocs", "Lower allocations from instructions to calls");
+}
+
+// Publically exposed interface to pass...
+const PassInfo *llvm::LowerAllocationsID = X.getPassInfo();
+// createLowerAllocationsPass - Interface to this file...
+Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) {
+  return new LowerAllocations(LowerMallocArgToInteger);
+}
+
+
+// doInitialization - For the lower allocations pass, this ensures that a
+// module contains a declaration for a malloc and a free function.
+//
+// This function is always successful.
+//
+bool LowerAllocations::doInitialization(Module &M) {
+  const Type *BPTy = PointerType::get(Type::Int8Ty);
+  // Prototype malloc as "char* malloc(...)", because we don't know in
+  // doInitialization whether size_t is int or long.
+  FunctionType *FT = FunctionType::get(BPTy, std::vector<const Type*>(), true);
+  MallocFunc = M.getOrInsertFunction("malloc", FT);
+  FreeFunc = M.getOrInsertFunction("free"  , Type::VoidTy, BPTy, (Type *)0);
+  return true;
+}
+
+// runOnBasicBlock - This method does the actual work of converting
+// instructions over, assuming that the pass has already been initialized.
+//
+bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
+  bool Changed = false;
+  assert(MallocFunc && FreeFunc && "Pass not initialized!");
+
+  BasicBlock::InstListType &BBIL = BB.getInstList();
+
+  const TargetData &TD = getAnalysis<TargetData>();
+  const Type *IntPtrTy = TD.getIntPtrType();
+
+  // Loop over all of the instructions, looking for malloc or free instructions
+  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {
+    if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
+      const Type *AllocTy = MI->getType()->getElementType();
+
+      // malloc(type) becomes sbyte *malloc(size)
+      Value *MallocArg;
+      if (LowerMallocArgToInteger)
+        MallocArg = ConstantInt::get(Type::Int64Ty, TD.getTypeSize(AllocTy));
+      else
+        MallocArg = ConstantExpr::getSizeOf(AllocTy);
+      MallocArg = ConstantExpr::getTruncOrBitCast(cast<Constant>(MallocArg), 
+                                                  IntPtrTy);
+
+      if (MI->isArrayAllocation()) {
+        if (isa<ConstantInt>(MallocArg) &&
+            cast<ConstantInt>(MallocArg)->isOne()) {
+          MallocArg = MI->getOperand(0);         // Operand * 1 = Operand
+        } else if (Constant *CO = dyn_cast<Constant>(MI->getOperand(0))) {
+          CO = ConstantExpr::getIntegerCast(CO, IntPtrTy, false /*ZExt*/);
+          MallocArg = ConstantExpr::getMul(CO, cast<Constant>(MallocArg));
+        } else {
+          Value *Scale = MI->getOperand(0);
+          if (Scale->getType() != IntPtrTy)
+            Scale = CastInst::createIntegerCast(Scale, IntPtrTy, false /*ZExt*/,
+                                                "", I);
+
+          // Multiply it by the array size if necessary...
+          MallocArg = BinaryOperator::create(Instruction::Mul, Scale,
+                                             MallocArg, "", I);
+        }
+      }
+
+      // Create the call to Malloc.
+      CallInst *MCall = new CallInst(MallocFunc, MallocArg, "", I);
+      MCall->setTailCall();
+
+      // Create a cast instruction to convert to the right type...
+      Value *MCast;
+      if (MCall->getType() != Type::VoidTy)
+        MCast = new BitCastInst(MCall, MI->getType(), "", I);
+      else
+        MCast = Constant::getNullValue(MI->getType());
+
+      // Replace all uses of the old malloc inst with the cast inst
+      MI->replaceAllUsesWith(MCast);
+      I = --BBIL.erase(I);         // remove and delete the malloc instr...
+      Changed = true;
+      ++NumLowered;
+    } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) {
+      Value *PtrCast = new BitCastInst(FI->getOperand(0),
+                                       PointerType::get(Type::Int8Ty), "", I);
+
+      // Insert a call to the free function...
+      (new CallInst(FreeFunc, PtrCast, "", I))->setTailCall();
+
+      // Delete the old free instruction
+      I = --BBIL.erase(I);
+      Changed = true;
+      ++NumLowered;
+    }
+  }
+
+  return Changed;
+}
+
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 0000000..d72c018
--- /dev/null
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,585 @@
+//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding.  This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort().  If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at the cost of making the 'invoke' instruction
+// really expensive.  It basically inserts setjmp/longjmp calls to emulate the
+// exception handling as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
+//
+// Note that after this pass runs the CFG is not entirely accurate (exceptional
+// control flow edges are not correct anymore) so only very simple things should
+// be done after the lowerinvoke pass has run (like generation of native code).
+// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
+// support the invoke instruction yet" lowering pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerinvoke"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetLowering.h"
+#include <csetjmp>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
+
+namespace {
+  class VISIBILITY_HIDDEN LowerInvoke : public FunctionPass {
+    // Used for both models.
+    Constant *WriteFn;
+    Constant *AbortFn;
+    Value *AbortMessage;
+    unsigned AbortMessageLength;
+
+    // Used for expensive EH support.
+    const Type *JBLinkTy;
+    GlobalVariable *JBListHead;
+    Constant *SetJmpFn, *LongJmpFn;
+    
+    // We peek in TLI to grab the target's jmp_buf size and alignment
+    const TargetLowering *TLI;
+    
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LowerInvoke(const TargetLowering *tli = NULL) : FunctionPass((intptr_t)&ID),
+      TLI(tli) { }
+    bool doInitialization(Module &M);
+    bool runOnFunction(Function &F);
+ 
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // This is a cluster of orthogonal Transforms
+      AU.addPreservedID(PromoteMemoryToRegisterID);
+      AU.addPreservedID(LowerSelectID);
+      AU.addPreservedID(LowerSwitchID);
+      AU.addPreservedID(LowerAllocationsID);
+    }
+       
+  private:
+    void createAbortMessage(Module *M);
+    void writeAbortMessage(Instruction *IB);
+    bool insertCheapEHSupport(Function &F);
+    void splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes);
+    void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+                                AllocaInst *InvokeNum, SwitchInst *CatchSwitch);
+    bool insertExpensiveEHSupport(Function &F);
+  };
+
+  char LowerInvoke::ID = 0;
+  RegisterPass<LowerInvoke>
+  X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators");
+}
+
+const PassInfo *llvm::LowerInvokePassID = X.getPassInfo();
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { 
+  return new LowerInvoke(TLI); 
+}
+
+// doInitialization - Make sure that there is a prototype for abort in the
+// current module.
+bool LowerInvoke::doInitialization(Module &M) {
+  const Type *VoidPtrTy = PointerType::get(Type::Int8Ty);
+  AbortMessage = 0;
+  if (ExpensiveEHSupport) {
+    // Insert a type for the linked list of jump buffers.
+    unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+    JBSize = JBSize ? JBSize : 200;
+    const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+    { // The type is recursive, so use a type holder.
+      std::vector<const Type*> Elements;
+      Elements.push_back(JmpBufTy);
+      OpaqueType *OT = OpaqueType::get();
+      Elements.push_back(PointerType::get(OT));
+      PATypeHolder JBLType(StructType::get(Elements));
+      OT->refineAbstractTypeTo(JBLType.get());  // Complete the cycle.
+      JBLinkTy = JBLType.get();
+      M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy);
+    }
+
+    const Type *PtrJBList = PointerType::get(JBLinkTy);
+
+    // Now that we've done that, insert the jmpbuf list head global, unless it
+    // already exists.
+    if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
+      JBListHead = new GlobalVariable(PtrJBList, false,
+                                      GlobalValue::LinkOnceLinkage,
+                                      Constant::getNullValue(PtrJBList),
+                                      "llvm.sjljeh.jblist", &M);
+    }
+    SetJmpFn = M.getOrInsertFunction("llvm.setjmp", Type::Int32Ty,
+                                     PointerType::get(JmpBufTy), (Type *)0);
+    LongJmpFn = M.getOrInsertFunction("llvm.longjmp", Type::VoidTy,
+                                      PointerType::get(JmpBufTy),
+                                      Type::Int32Ty, (Type *)0);
+  }
+
+  // We need the 'write' and 'abort' functions for both models.
+  AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, (Type *)0);
+#if 0 // "write" is Unix-specific.. code is going away soon anyway.
+  WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty,
+                                  VoidPtrTy, Type::Int32Ty, (Type *)0);
+#else
+  WriteFn = 0;
+#endif
+  return true;
+}
+
+void LowerInvoke::createAbortMessage(Module *M) {
+  if (ExpensiveEHSupport) {
+    // The abort message for expensive EH support tells the user that the
+    // program 'unwound' without an 'invoke' instruction.
+    Constant *Msg =
+      ConstantArray::get("ERROR: Exception thrown, but not caught!\n");
+    AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0
+
+    GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true,
+                                               GlobalValue::InternalLinkage,
+                                               Msg, "abortmsg", M);
+    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty));
+    AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
+  } else {
+    // The abort message for cheap EH support tells the user that EH is not
+    // enabled.
+    Constant *Msg =
+      ConstantArray::get("Exception handler needed, but not enabled.  Recompile"
+                         " program with -enable-correct-eh-support.\n");
+    AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0
+
+    GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true,
+                                               GlobalValue::InternalLinkage,
+                                               Msg, "abortmsg", M);
+    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty));
+    AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
+  }
+}
+
+
+void LowerInvoke::writeAbortMessage(Instruction *IB) {
+#if 0
+  if (AbortMessage == 0)
+    createAbortMessage(IB->getParent()->getParent()->getParent());
+
+  // These are the arguments we WANT...
+  Value* Args[3];
+  Args[0] = ConstantInt::get(Type::Int32Ty, 2);
+  Args[1] = AbortMessage;
+  Args[2] = ConstantInt::get(Type::Int32Ty, AbortMessageLength);
+  (new CallInst(WriteFn, Args, 3, "", IB))->setTailCall();
+#endif
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
+  bool Changed = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      std::vector<Value*> CallArgs(II->op_begin()+3, II->op_end());
+      // Insert a normal call instruction...
+      CallInst *NewCall = new CallInst(II->getCalledValue(),
+                                       &CallArgs[0], CallArgs.size(), "", II);
+      NewCall->takeName(II);
+      NewCall->setCallingConv(II->getCallingConv());
+      II->replaceAllUsesWith(NewCall);
+
+      // Insert an unconditional branch to the normal destination.
+      new BranchInst(II->getNormalDest(), II);
+
+      // Remove any PHI node entries from the exception destination.
+      II->getUnwindDest()->removePredecessor(BB);
+
+      // Remove the invoke instruction now.
+      BB->getInstList().erase(II);
+
+      ++NumInvokes; Changed = true;
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // Insert a new call to write(2, AbortMessage, AbortMessageLength);
+      writeAbortMessage(UI);
+
+      // Insert a call to abort()
+      (new CallInst(AbortFn, "", UI))->setTailCall();
+
+      // Insert a return instruction.  This really should be a "barrier", as it
+      // is unreachable.
+      new ReturnInst(F.getReturnType() == Type::VoidTy ? 0 :
+                            Constant::getNullValue(F.getReturnType()), UI);
+
+      // Remove the unwind instruction now.
+      BB->getInstList().erase(UI);
+
+      ++NumUnwinds; Changed = true;
+    }
+  return Changed;
+}
+
+/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
+/// specified invoke instruction with a call.
+void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+                                         AllocaInst *InvokeNum,
+                                         SwitchInst *CatchSwitch) {
+  ConstantInt *InvokeNoC = ConstantInt::get(Type::Int32Ty, InvokeNo);
+
+  // Insert a store of the invoke num before the invoke and store zero into the
+  // location afterward.
+  new StoreInst(InvokeNoC, InvokeNum, true, II);  // volatile
+  
+  BasicBlock::iterator NI = II->getNormalDest()->begin();
+  while (isa<PHINode>(NI)) ++NI;
+  // nonvolatile.
+  new StoreInst(Constant::getNullValue(Type::Int32Ty), InvokeNum, false, NI);
+  
+  // Add a switch case to our unwind block.
+  CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
+  
+  // Insert a normal call instruction.
+  std::vector<Value*> CallArgs(II->op_begin()+3, II->op_end());
+  CallInst *NewCall = new CallInst(II->getCalledValue(),
+                                   &CallArgs[0], CallArgs.size(), "",
+                                   II);
+  NewCall->takeName(II);
+  NewCall->setCallingConv(II->getCallingConv());
+  II->replaceAllUsesWith(NewCall);
+  
+  // Replace the invoke with an uncond branch.
+  new BranchInst(II->getNormalDest(), NewCall->getParent());
+  II->eraseFromParent();
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+  if (!LiveBBs.insert(BB).second) return; // already been here.
+  
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    MarkBlocksLiveIn(*PI, LiveBBs);  
+}
+
+// First thing we need to do is scan the whole function for values that are
+// live across unwind edges.  Each value that is live across an unwind edge
+// we spill into a stack location, guaranteeing that there is nothing live
+// across the unwind edge.  This process also splits all critical edges
+// coming out of invoke's.
+void LowerInvoke::
+splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) {
+  // First step, split all critical edges from invoke instructions.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+    InvokeInst *II = Invokes[i];
+    SplitCriticalEdge(II, 0, this);
+    SplitCriticalEdge(II, 1, this);
+    assert(!isa<PHINode>(II->getNormalDest()) &&
+           !isa<PHINode>(II->getUnwindDest()) &&
+           "critical edge splitting left single entry phi nodes?");
+  }
+
+  Function *F = Invokes.back()->getParent()->getParent();
+  
+  // To avoid having to handle incoming arguments specially, we lower each arg
+  // to a copy instruction in the entry block.  This ensures that the argument
+  // value itself cannot be live across the entry block.
+  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+    ++AfterAllocaInsertPt;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+       AI != E; ++AI) {
+    // This is always a no-op cast because we're casting AI to AI->getType() so
+    // src and destination types are identical. BitCast is the only possibility.
+    CastInst *NC = new BitCastInst(
+      AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+    AI->replaceAllUsesWith(NC);
+    // Normally its is forbidden to replace a CastInst's operand because it
+    // could cause the opcode to reflect an illegal conversion. However, we're
+    // replacing it here with the same value it was constructed with to simply
+    // make NC its user.
+    NC->setOperand(0, AI); 
+  }
+  
+  // Finally, scan the code looking for instructions with bad live ranges.
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      // Ignore obvious cases we don't have to handle.  In particular, most
+      // instructions either have no uses or only have a single use inside the
+      // current block.  Ignore them quickly.
+      Instruction *Inst = II;
+      if (Inst->use_empty()) continue;
+      if (Inst->hasOneUse() &&
+          cast<Instruction>(Inst->use_back())->getParent() == BB &&
+          !isa<PHINode>(Inst->use_back())) continue;
+      
+      // If this is an alloca in the entry block, it's not a real register
+      // value.
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+          continue;
+      
+      // Avoid iterator invalidation by copying users to a temporary vector.
+      std::vector<Instruction*> Users;
+      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+           UI != E; ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        if (User->getParent() != BB || isa<PHINode>(User))
+          Users.push_back(User);
+      }
+
+      // Scan all of the uses and see if the live range is live across an unwind
+      // edge.  If we find a use live across an invoke edge, create an alloca
+      // and spill the value.
+      std::set<InvokeInst*> InvokesWithStoreInserted;
+
+      // Find all of the blocks that this value is live in.
+      std::set<BasicBlock*> LiveBBs;
+      LiveBBs.insert(Inst->getParent());
+      while (!Users.empty()) {
+        Instruction *U = Users.back();
+        Users.pop_back();
+        
+        if (!isa<PHINode>(U)) {
+          MarkBlocksLiveIn(U->getParent(), LiveBBs);
+        } else {
+          // Uses for a PHI node occur in their predecessor block.
+          PHINode *PN = cast<PHINode>(U);
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+            if (PN->getIncomingValue(i) == Inst)
+              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+        }
+      }
+      
+      // Now that we know all of the blocks that this thing is live in, see if
+      // it includes any of the unwind locations.
+      bool NeedsSpill = false;
+      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+          NeedsSpill = true;
+        }
+      }
+
+      // If we decided we need a spill, do it.
+      if (NeedsSpill) {
+        ++NumSpilled;
+        DemoteRegToStack(*Inst, true);
+      }
+    }
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+  std::vector<ReturnInst*> Returns;
+  std::vector<UnwindInst*> Unwinds;
+  std::vector<InvokeInst*> Invokes;
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      // Remember all return instructions in case we insert an invoke into this
+      // function.
+      Returns.push_back(RI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Invokes.push_back(II);
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      Unwinds.push_back(UI);
+    }
+
+  if (Unwinds.empty() && Invokes.empty()) return false;
+
+  NumInvokes += Invokes.size();
+  NumUnwinds += Unwinds.size();
+  
+  // TODO: This is not an optimal way to do this.  In particular, this always
+  // inserts setjmp calls into the entries of functions with invoke instructions
+  // even though there are possibly paths through the function that do not
+  // execute any invokes.  In particular, for functions with early exits, e.g.
+  // the 'addMove' method in hexxagon, it would be nice to not have to do the
+  // setjmp stuff on the early exit path.  This requires a bit of dataflow, but
+  // would not be too hard to do.
+
+  // If we have an invoke instruction, insert a setjmp that dominates all
+  // invokes.  After the setjmp, use a cond branch that goes to the original
+  // code path on zero, and to a designated 'catch' block of nonzero.
+  Value *OldJmpBufPtr = 0;
+  if (!Invokes.empty()) {
+    // First thing we need to do is scan the whole function for values that are
+    // live across unwind edges.  Each value that is live across an unwind edge
+    // we spill into a stack location, guaranteeing that there is nothing live
+    // across the unwind edge.  This process also splits all critical edges
+    // coming out of invoke's.
+    splitLiveRangesLiveAcrossInvokes(Invokes);    
+    
+    BasicBlock *EntryBB = F.begin();
+    
+    // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+    // that needs to be restored on all exits from the function.  This is an
+    // alloca because the value needs to be live across invokes.
+    unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+    AllocaInst *JmpBuf = 
+      new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin());
+    
+    std::vector<Value*> Idx;
+    Idx.push_back(Constant::getNullValue(Type::Int32Ty));
+    Idx.push_back(ConstantInt::get(Type::Int32Ty, 1));
+    OldJmpBufPtr = new GetElementPtrInst(JmpBuf, &Idx[0], 2, "OldBuf",
+                                         EntryBB->getTerminator());
+
+    // Copy the JBListHead to the alloca.
+    Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
+                                 EntryBB->getTerminator());
+    new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
+    
+    // Add the new jumpbuf to the list.
+    new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
+
+    // Create the catch block.  The catch block is basically a big switch
+    // statement that goes to all of the invoke catch blocks.
+    BasicBlock *CatchBB = new BasicBlock("setjmp.catch", &F);
+    
+    // Create an alloca which keeps track of which invoke is currently
+    // executing.  For normal calls it contains zero.
+    AllocaInst *InvokeNum = new AllocaInst(Type::Int32Ty, 0, "invokenum",
+                                           EntryBB->begin());
+    new StoreInst(ConstantInt::get(Type::Int32Ty, 0), InvokeNum, true,
+                  EntryBB->getTerminator());
+    
+    // Insert a load in the Catch block, and a switch on its value.  By default,
+    // we go to a block that just does an unwind (which is the correct action
+    // for a standard call).
+    BasicBlock *UnwindBB = new BasicBlock("unwindbb", &F);
+    Unwinds.push_back(new UnwindInst(UnwindBB));
+    
+    Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
+    SwitchInst *CatchSwitch = 
+      new SwitchInst(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
+
+    // Now that things are set up, insert the setjmp call itself.
+    
+    // Split the entry block to insert the conditional branch for the setjmp.
+    BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+                                                     "setjmp.cont");
+
+    Idx[1] = ConstantInt::get(Type::Int32Ty, 0);
+    Value *JmpBufPtr = new GetElementPtrInst(JmpBuf, &Idx[0], Idx.size(),
+                                             "TheJmpBuf",
+                                             EntryBB->getTerminator());
+    Value *SJRet = new CallInst(SetJmpFn, JmpBufPtr, "sjret",
+                                EntryBB->getTerminator());
+    
+    // Compare the return value to zero.
+    Value *IsNormal = new ICmpInst(ICmpInst::ICMP_EQ, SJRet, 
+                                   Constant::getNullValue(SJRet->getType()),
+      "notunwind", EntryBB->getTerminator());
+    // Nuke the uncond branch.
+    EntryBB->getTerminator()->eraseFromParent();
+    
+    // Put in a new condbranch in its place.
+    new BranchInst(ContBlock, CatchBB, IsNormal, EntryBB);
+
+    // At this point, we are all set up, rewrite each invoke instruction.
+    for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+      rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, CatchSwitch);
+  }
+
+  // We know that there is at least one unwind.
+  
+  // Create three new blocks, the block to load the jmpbuf ptr and compare
+  // against null, the block to do the longjmp, and the error block for if it
+  // is null.  Add them at the end of the function because they are not hot.
+  BasicBlock *UnwindHandler = new BasicBlock("dounwind", &F);
+  BasicBlock *UnwindBlock = new BasicBlock("unwind", &F);
+  BasicBlock *TermBlock = new BasicBlock("unwinderror", &F);
+
+  // If this function contains an invoke, restore the old jumpbuf ptr.
+  Value *BufPtr;
+  if (OldJmpBufPtr) {
+    // Before the return, insert a copy from the saved value to the new value.
+    BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
+    new StoreInst(BufPtr, JBListHead, UnwindHandler);
+  } else {
+    BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
+  }
+  
+  // Load the JBList, if it's null, then there was no catch!
+  Value *NotNull = new ICmpInst(ICmpInst::ICMP_NE, BufPtr, 
+                                Constant::getNullValue(BufPtr->getType()),
+    "notnull", UnwindHandler);
+  new BranchInst(UnwindBlock, TermBlock, NotNull, UnwindHandler);
+  
+  // Create the block to do the longjmp.
+  // Get a pointer to the jmpbuf and longjmp.
+  std::vector<Value*> Idx;
+  Idx.push_back(Constant::getNullValue(Type::Int32Ty));
+  Idx.push_back(ConstantInt::get(Type::Int32Ty, 0));
+  Idx[0] = new GetElementPtrInst(BufPtr, &Idx[0], 2, "JmpBuf", UnwindBlock);
+  Idx[1] = ConstantInt::get(Type::Int32Ty, 1);
+  new CallInst(LongJmpFn, &Idx[0], Idx.size(), "", UnwindBlock);
+  new UnreachableInst(UnwindBlock);
+  
+  // Set up the term block ("throw without a catch").
+  new UnreachableInst(TermBlock);
+
+  // Insert a new call to write(2, AbortMessage, AbortMessageLength);
+  writeAbortMessage(TermBlock->getTerminator());
+  
+  // Insert a call to abort()
+  (new CallInst(AbortFn, "",
+                TermBlock->getTerminator()))->setTailCall();
+    
+  
+  // Replace all unwinds with a branch to the unwind handler.
+  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+    new BranchInst(UnwindHandler, Unwinds[i]);
+    Unwinds[i]->eraseFromParent();    
+  } 
+  
+  // Finally, for any returns from this function, if this function contains an
+  // invoke, restore the old jmpbuf pointer to its input value.
+  if (OldJmpBufPtr) {
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      ReturnInst *R = Returns[i];
+      
+      // Before the return, insert a copy from the saved value to the new value.
+      Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
+      new StoreInst(OldBuf, JBListHead, true, R);
+    }
+  }
+  
+  return true;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+  if (ExpensiveEHSupport)
+    return insertExpensiveEHSupport(F);
+  else
+    return insertCheapEHSupport(F);
+}
diff --git a/lib/Transforms/Utils/LowerSelect.cpp b/lib/Transforms/Utils/LowerSelect.cpp
new file mode 100644
index 0000000..1882695
--- /dev/null
+++ b/lib/Transforms/Utils/LowerSelect.cpp
@@ -0,0 +1,105 @@
+//===- LowerSelect.cpp - Transform select insts to branches ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers select instructions into conditional branches for targets
+// that do not have conditional moves or that have not implemented the select
+// instruction yet.
+//
+// Note that this pass could be improved.  In particular it turns every select
+// instruction into a new conditional branch, even though some common cases have
+// select instructions on the same predicate next to each other.  It would be
+// better to use the same branch for the whole group of selects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+namespace {
+  /// LowerSelect - Turn select instructions into conditional branches.
+  ///
+  class VISIBILITY_HIDDEN LowerSelect : public FunctionPass {
+    bool OnlyFP;   // Only lower FP select instructions?
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LowerSelect(bool onlyfp = false) : FunctionPass((intptr_t)&ID), 
+      OnlyFP(onlyfp) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // This certainly destroys the CFG.
+      // This is a cluster of orthogonal Transforms:
+      AU.addPreserved<UnifyFunctionExitNodes>();
+      AU.addPreservedID(PromoteMemoryToRegisterID);
+      AU.addPreservedID(LowerSwitchID);
+      AU.addPreservedID(LowerInvokePassID);
+      AU.addPreservedID(LowerAllocationsID);
+    }
+
+    bool runOnFunction(Function &F);
+  };
+
+  char LowerSelect::ID = 0;
+  RegisterPass<LowerSelect>
+  X("lowerselect", "Lower select instructions to branches");
+}
+
+// Publically exposed interface to pass...
+const PassInfo *llvm::LowerSelectID = X.getPassInfo();
+//===----------------------------------------------------------------------===//
+// This pass converts SelectInst instructions into conditional branch and PHI
+// instructions.  If the OnlyFP flag is set to true, then only floating point
+// select instructions are lowered.
+//
+FunctionPass *llvm::createLowerSelectPass(bool OnlyFP) {
+  return new LowerSelect(OnlyFP);
+}
+
+
+bool LowerSelect::runOnFunction(Function &F) {
+  bool Changed = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      if (SelectInst *SI = dyn_cast<SelectInst>(I))
+        if (!OnlyFP || SI->getType()->isFloatingPoint()) {
+          // Split this basic block in half right before the select instruction.
+          BasicBlock *NewCont =
+            BB->splitBasicBlock(I, BB->getName()+".selectcont");
+
+          // Make the true block, and make it branch to the continue block.
+          BasicBlock *NewTrue = new BasicBlock(BB->getName()+".selecttrue",
+                                               BB->getParent(), NewCont);
+          new BranchInst(NewCont, NewTrue);
+
+          // Make the unconditional branch in the incoming block be a
+          // conditional branch on the select predicate.
+          BB->getInstList().erase(BB->getTerminator());
+          new BranchInst(NewTrue, NewCont, SI->getCondition(), BB);
+
+          // Create a new PHI node in the cont block with the entries we need.
+          PHINode *PN = new PHINode(SI->getType(), "", NewCont->begin());
+          PN->takeName(SI);
+          PN->addIncoming(SI->getTrueValue(), NewTrue);
+          PN->addIncoming(SI->getFalseValue(), BB);
+
+          // Use the PHI instead of the select.
+          SI->replaceAllUsesWith(PN);
+          NewCont->getInstList().erase(SI);
+
+          Changed = true;
+          break; // This block is done with.
+        }
+    }
+  return Changed;
+}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 0000000..633633d
--- /dev/null
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,324 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch statements with a sequence of
+// branches, which allows targets to get away with not implementing the switch
+// statement until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+  /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
+  /// instructions.  Note that this cannot be a BasicBlock pass because it
+  /// modifies the CFG!
+  class VISIBILITY_HIDDEN LowerSwitch : public FunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LowerSwitch() : FunctionPass((intptr_t) &ID) {} 
+
+    virtual bool runOnFunction(Function &F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved<UnifyFunctionExitNodes>();
+      AU.addPreservedID(PromoteMemoryToRegisterID);
+      AU.addPreservedID(LowerSelectID);
+      AU.addPreservedID(LowerInvokePassID);
+      AU.addPreservedID(LowerAllocationsID);
+    }
+
+    struct CaseRange {
+      Constant* Low;
+      Constant* High;
+      BasicBlock* BB;
+
+      CaseRange() : Low(0), High(0), BB(0) { }
+      CaseRange(Constant* low, Constant* high, BasicBlock* bb) :
+        Low(low), High(high), BB(bb) { }
+    };
+
+    typedef std::vector<CaseRange>           CaseVector;
+    typedef std::vector<CaseRange>::iterator CaseItr;
+  private:
+    void processSwitchInst(SwitchInst *SI);
+
+    BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
+                              BasicBlock* OrigBlock, BasicBlock* Default);
+    BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
+                             BasicBlock* OrigBlock, BasicBlock* Default);
+    unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+  };
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator () (const LowerSwitch::CaseRange& C1,
+                      const LowerSwitch::CaseRange& C2) {
+
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
+
+  char LowerSwitch::ID = 0;
+  RegisterPass<LowerSwitch>
+  X("lowerswitch", "Lower SwitchInst's to branches");
+}
+
+// Publically exposed interface to pass...
+const PassInfo *llvm::LowerSwitchID = X.getPassInfo();
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+  return new LowerSwitch();
+}
+
+bool LowerSwitch::runOnFunction(Function &F) {
+  bool Changed = false;
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+    BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+      Changed = true;
+      processSwitchInst(SI);
+    }
+  }
+
+  return Changed;
+}
+
+// operator<< - Used for debugging purposes.
+//
+static std::ostream& operator<<(std::ostream &O,
+                                const LowerSwitch::CaseVector &C) {
+  O << "[";
+
+  for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
+         E = C.end(); B != E; ) {
+    O << *B->Low << " -" << *B->High;
+    if (++B != E) O << ", ";
+  }
+
+  return O << "]";
+}
+
+static OStream& operator<<(OStream &O, const LowerSwitch::CaseVector &C) {
+  if (O.stream()) *O.stream() << C;
+  return O;
+}
+
+// switchConvert - Convert the switch statement into a binary lookup of
+// the case values. The function recursively builds this tree.
+//
+BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+                                       Value* Val, BasicBlock* OrigBlock,
+                                       BasicBlock* Default)
+{
+  unsigned Size = End - Begin;
+
+  if (Size == 1)
+    return newLeafBlock(*Begin, Val, OrigBlock, Default);
+
+  unsigned Mid = Size / 2;
+  std::vector<CaseRange> LHS(Begin, Begin + Mid);
+  DOUT << "LHS: " << LHS << "\n";
+  std::vector<CaseRange> RHS(Begin + Mid, End);
+  DOUT << "RHS: " << RHS << "\n";
+
+  CaseRange& Pivot = *(Begin + Mid);
+  DEBUG( DOUT << "Pivot ==> " 
+              << cast<ConstantInt>(Pivot.Low)->getValue().toStringSigned(10)
+              << " -"
+              << cast<ConstantInt>(Pivot.High)->getValue().toStringSigned(10)
+              << "\n");
+
+  BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
+                                      OrigBlock, Default);
+  BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
+                                      OrigBlock, Default);
+
+  // Create a new node that checks if the value is < pivot. Go to the
+  // left branch if it is and right branch if not.
+  Function* F = OrigBlock->getParent();
+  BasicBlock* NewNode = new BasicBlock("NodeBlock");
+  Function::iterator FI = OrigBlock;
+  F->getBasicBlockList().insert(++FI, NewNode);
+
+  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot");
+  NewNode->getInstList().push_back(Comp);
+  new BranchInst(LBranch, RBranch, Comp, NewNode);
+  return NewNode;
+}
+
+// newLeafBlock - Create a new leaf block for the binary lookup tree. It
+// checks if the switch's value == the case's value. If not, then it
+// jumps to the default branch. At this point in the tree, the value
+// can't be another valid case value, so the jump to the "default" branch
+// is warranted.
+//
+BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
+                                      BasicBlock* OrigBlock,
+                                      BasicBlock* Default)
+{
+  Function* F = OrigBlock->getParent();
+  BasicBlock* NewLeaf = new BasicBlock("LeafBlock");
+  Function::iterator FI = OrigBlock;
+  F->getBasicBlockList().insert(++FI, NewLeaf);
+
+  // Emit comparison
+  ICmpInst* Comp = NULL;
+  if (Leaf.Low == Leaf.High) {
+    // Make the seteq instruction...
+    Comp = new ICmpInst(ICmpInst::ICMP_EQ, Val, Leaf.Low,
+                        "SwitchLeaf", NewLeaf);
+  } else {
+    // Make range comparison
+    if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+      // Val >= Min && Val <= Hi --> Val <= Hi
+      Comp = new ICmpInst(ICmpInst::ICMP_SLE, Val, Leaf.High,
+                          "SwitchLeaf", NewLeaf);
+    } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+      // Val >= 0 && Val <= Hi --> Val <=u Hi
+      Comp = new ICmpInst(ICmpInst::ICMP_ULE, Val, Leaf.High,
+                          "SwitchLeaf", NewLeaf);      
+    } else {
+      // Emit V-Lo <=u Hi-Lo
+      Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
+      Instruction* Add = BinaryOperator::createAdd(Val, NegLo,
+                                                   Val->getName()+".off",
+                                                   NewLeaf);
+      Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+      Comp = new ICmpInst(ICmpInst::ICMP_ULE, Add, UpperBound,
+                          "SwitchLeaf", NewLeaf);
+    }
+  }
+
+  // Make the conditional branch...
+  BasicBlock* Succ = Leaf.BB;
+  new BranchInst(Succ, Default, Comp, NewLeaf);
+
+  // If there were any PHI nodes in this successor, rewrite one entry
+  // from OrigBlock to come from NewLeaf.
+  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+    PHINode* PN = cast<PHINode>(I);
+    // Remove all but one incoming entries from the cluster
+    uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
+                     cast<ConstantInt>(Leaf.Low)->getSExtValue();    
+    for (uint64_t j = 0; j < Range; ++j) {
+      PN->removeIncomingValue(OrigBlock);
+    }
+    
+    int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+    assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+    PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+  }
+
+  return NewLeaf;
+}
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+  unsigned numCmps = 0;
+
+  // Start with "simple" cases
+  for (unsigned i = 1; i < SI->getNumSuccessors(); ++i)
+    Cases.push_back(CaseRange(SI->getSuccessorValue(i),
+                              SI->getSuccessorValue(i),
+                              SI->getSuccessor(i)));
+  sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size()>=2)
+    for (CaseItr I=Cases.begin(), J=++(Cases.begin()), E=Cases.end(); J!=E; ) {
+      int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+      int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+      BasicBlock* nextBB = J->BB;
+      BasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
+  }
+
+  return numCmps;
+}
+
+// processSwitchInst - Replace the specified switch instruction with a sequence
+// of chained if-then insts in a balanced binary search.
+//
+void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+  BasicBlock *CurBlock = SI->getParent();
+  BasicBlock *OrigBlock = CurBlock;
+  Function *F = CurBlock->getParent();
+  Value *Val = SI->getOperand(0);  // The value we are switching on...
+  BasicBlock* Default = SI->getDefaultDest();
+
+  // If there is only the default destination, don't bother with the code below.
+  if (SI->getNumOperands() == 2) {
+    new BranchInst(SI->getDefaultDest(), CurBlock);
+    CurBlock->getInstList().erase(SI);
+    return;
+  }
+
+  // Create a new, empty default block so that the new hierarchy of
+  // if-then statements go to this and the PHI nodes are happy.
+  BasicBlock* NewDefault = new BasicBlock("NewDefault");
+  F->getBasicBlockList().insert(Default, NewDefault);
+
+  new BranchInst(Default, NewDefault);
+
+  // If there is an entry in any PHI nodes for the default edge, make sure
+  // to update them as well.
+  for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+    assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+    PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
+  }
+
+  // Prepare cases vector.
+  CaseVector Cases;
+  unsigned numCmps = Clusterify(Cases, SI);
+
+  DOUT << "Clusterify finished. Total clusters: " << Cases.size()
+       << ". Total compares: " << numCmps << "\n";
+  DOUT << "Cases: " << Cases << "\n";
+  
+  BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
+                                          OrigBlock, NewDefault);
+
+  // Branch to our shiny new if-then stuff...
+  new BranchInst(SwitchBlock, OrigBlock);
+
+  // We are now done with the switch instruction, delete it.
+  CurBlock->getInstList().erase(SI);
+}
diff --git a/lib/Transforms/Utils/Makefile b/lib/Transforms/Utils/Makefile
new file mode 100644
index 0000000..26fc426
--- /dev/null
+++ b/lib/Transforms/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Utils/Makefile -----------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file was developed by the LLVM research group and is distributed under
+# the University of Illinois Open Source License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMTransformUtils
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 0000000..d67b3de
--- /dev/null
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,93 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+using namespace llvm;
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+namespace {
+  struct VISIBILITY_HIDDEN PromotePass : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    PromotePass() : FunctionPass((intptr_t)&ID) {}
+
+    // runOnFunction - To run this pass, first we calculate the alloca
+    // instructions that are safe for promotion, then we promote each one.
+    //
+    virtual bool runOnFunction(Function &F);
+
+    // getAnalysisUsage - We need dominance frontiers
+    //
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<DominanceFrontier>();
+      AU.setPreservesCFG();
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved<UnifyFunctionExitNodes>();
+      AU.addPreservedID(LowerSelectID);
+      AU.addPreservedID(LowerSwitchID);
+      AU.addPreservedID(LowerInvokePassID);
+      AU.addPreservedID(LowerAllocationsID);
+    }
+  };
+
+  char PromotePass::ID = 0;
+  RegisterPass<PromotePass> X("mem2reg", "Promote Memory to Register");
+}  // end of anonymous namespace
+
+bool PromotePass::runOnFunction(Function &F) {
+  std::vector<AllocaInst*> Allocas;
+
+  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
+
+  bool Changed  = false;
+
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+  DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
+
+  while (1) {
+    Allocas.clear();
+
+    // Find allocas that are safe to promote, by looking at all instructions in
+    // the entry node
+    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
+        if (isAllocaPromotable(AI))
+          Allocas.push_back(AI);
+
+    if (Allocas.empty()) break;
+
+    PromoteMemToReg(Allocas, DT, DF);
+    NumPromoted += Allocas.size();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+// Publically exposed interface to pass...
+const PassInfo *llvm::PromoteMemoryToRegisterID = X.getPassInfo();
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+//
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+  return new PromotePass();
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 0000000..259a5a2
--- /dev/null
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,835 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promote memory references to be register references.  It promotes
+// alloca instructions which only have loads and stores as uses.  An alloca is
+// transformed by using dominator frontiers to place PHI nodes, then traversing
+// the function in depth-first order to rewrite loads and stores as appropriate.
+// This is just the standard SSA construction algorithm to construct "pruned"
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include <algorithm>
+using namespace llvm;
+
+// Provide DenseMapKeyInfo for all pointers.
+namespace llvm {
+template<>
+struct DenseMapKeyInfo<std::pair<BasicBlock*, unsigned> > {
+  static inline std::pair<BasicBlock*, unsigned> getEmptyKey() {
+    return std::make_pair((BasicBlock*)-1, ~0U);
+  }
+  static inline std::pair<BasicBlock*, unsigned> getTombstoneKey() {
+    return std::make_pair((BasicBlock*)-2, 0U);
+  }
+  static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
+    return DenseMapKeyInfo<void*>::getHashValue(Val.first) + Val.second*2;
+  }
+  static bool isPod() { return true; }
+};
+}
+
+/// isAllocaPromotable - Return true if this alloca is legal for promotion.
+/// This is true if there are only loads and stores to the alloca.
+///
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+  // FIXME: If the memory unit is of pointer or integer type, we can permit
+  // assignments to subsections of the memory unit.
+
+  // Only allow direct loads and stores...
+  for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE; ++UI)     // Loop over all of the uses of the alloca
+    if (isa<LoadInst>(*UI)) {
+      // noop
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+      if (SI->getOperand(0) == AI)
+        return false;   // Don't allow a store OF the AI, only INTO the AI.
+    } else {
+      return false;   // Not a load or store.
+    }
+
+  return true;
+}
+
+namespace {
+
+  // Data package used by RenamePass()
+  class VISIBILITY_HIDDEN RenamePassData {
+  public:
+    RenamePassData(BasicBlock *B, BasicBlock *P,
+                   const std::vector<Value *> &V) : BB(B), Pred(P), Values(V) {}
+    BasicBlock *BB;
+    BasicBlock *Pred;
+    std::vector<Value *> Values;
+  };
+
+  struct VISIBILITY_HIDDEN PromoteMem2Reg {
+    /// Allocas - The alloca instructions being promoted.
+    ///
+    std::vector<AllocaInst*> Allocas;
+    SmallVector<AllocaInst*, 16> &RetryList;
+    DominatorTree &DT;
+    DominanceFrontier &DF;
+
+    /// AST - An AliasSetTracker object to update.  If null, don't update it.
+    ///
+    AliasSetTracker *AST;
+
+    /// AllocaLookup - Reverse mapping of Allocas.
+    ///
+    std::map<AllocaInst*, unsigned>  AllocaLookup;
+
+    /// NewPhiNodes - The PhiNodes we're adding.
+    ///
+    DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
+    
+    /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
+    /// it corresponds to.
+    DenseMap<PHINode*, unsigned> PhiToAllocaMap;
+    
+    /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
+    /// each alloca that is of pointer type, we keep track of what to copyValue
+    /// to the inserted PHI nodes here.
+    ///
+    std::vector<Value*> PointerAllocaValues;
+
+    /// Visited - The set of basic blocks the renamer has already visited.
+    ///
+    SmallPtrSet<BasicBlock*, 16> Visited;
+
+    /// BBNumbers - Contains a stable numbering of basic blocks to avoid
+    /// non-determinstic behavior.
+    DenseMap<BasicBlock*, unsigned> BBNumbers;
+
+    /// RenamePassWorkList - Worklist used by RenamePass()
+    std::vector<RenamePassData> RenamePassWorkList;
+
+  public:
+    PromoteMem2Reg(const std::vector<AllocaInst*> &A,
+                   SmallVector<AllocaInst*, 16> &Retry, DominatorTree &dt,
+                   DominanceFrontier &df, AliasSetTracker *ast)
+      : Allocas(A), RetryList(Retry), DT(dt), DF(df), AST(ast) {}
+
+    void run();
+
+    /// properlyDominates - Return true if I1 properly dominates I2.
+    ///
+    bool properlyDominates(Instruction *I1, Instruction *I2) const {
+      if (InvokeInst *II = dyn_cast<InvokeInst>(I1))
+        I1 = II->getNormalDest()->begin();
+      return DT.properlyDominates(I1->getParent(), I2->getParent());
+    }
+    
+    /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
+    ///
+    bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
+      return DT.dominates(BB1, BB2);
+    }
+
+  private:
+    void MarkDominatingPHILive(BasicBlock *BB, unsigned AllocaNum,
+                               SmallPtrSet<PHINode*, 16> &DeadPHINodes);
+    bool PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI);
+    void PromoteLocallyUsedAllocas(BasicBlock *BB,
+                                   const std::vector<AllocaInst*> &AIs);
+
+    void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+                    std::vector<Value*> &IncVals);
+    bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version,
+                      SmallPtrSet<PHINode*, 16> &InsertedPHINodes);
+  };
+
+}  // end of anonymous namespace
+
+void PromoteMem2Reg::run() {
+  Function &F = *DF.getRoot()->getParent();
+
+  // LocallyUsedAllocas - Keep track of all of the alloca instructions which are
+  // only used in a single basic block.  These instructions can be efficiently
+  // promoted by performing a single linear scan over that one block.  Since
+  // individual basic blocks are sometimes large, we group together all allocas
+  // that are live in a single basic block by the basic block they are live in.
+  std::map<BasicBlock*, std::vector<AllocaInst*> > LocallyUsedAllocas;
+
+  if (AST) PointerAllocaValues.resize(Allocas.size());
+
+  for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+    AllocaInst *AI = Allocas[AllocaNum];
+
+    assert(isAllocaPromotable(AI) &&
+           "Cannot promote non-promotable alloca!");
+    assert(AI->getParent()->getParent() == &F &&
+           "All allocas should be in the same function, which is same as DF!");
+
+    if (AI->use_empty()) {
+      // If there are no uses of the alloca, just delete it now.
+      if (AST) AST->deleteValue(AI);
+      AI->eraseFromParent();
+
+      // Remove the alloca from the Allocas list, since it has been processed
+      Allocas[AllocaNum] = Allocas.back();
+      Allocas.pop_back();
+      --AllocaNum;
+      continue;
+    }
+
+    // Calculate the set of read and write-locations for each alloca.  This is
+    // analogous to finding the 'uses' and 'definitions' of each variable.
+    std::vector<BasicBlock*> DefiningBlocks;
+    std::vector<BasicBlock*> UsingBlocks;
+
+    StoreInst  *OnlyStore = 0;
+    BasicBlock *OnlyBlock = 0;
+    bool OnlyUsedInOneBlock = true;
+
+    // As we scan the uses of the alloca instruction, keep track of stores, and
+    // decide whether all of the loads and stores to the alloca are within the
+    // same basic block.
+    Value *AllocaPointerVal = 0;
+    for (Value::use_iterator U =AI->use_begin(), E = AI->use_end(); U != E;++U){
+      Instruction *User = cast<Instruction>(*U);
+      if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+        // Remember the basic blocks which define new values for the alloca
+        DefiningBlocks.push_back(SI->getParent());
+        AllocaPointerVal = SI->getOperand(0);
+        OnlyStore = SI;
+      } else {
+        LoadInst *LI = cast<LoadInst>(User);
+        // Otherwise it must be a load instruction, keep track of variable reads
+        UsingBlocks.push_back(LI->getParent());
+        AllocaPointerVal = LI;
+      }
+
+      if (OnlyUsedInOneBlock) {
+        if (OnlyBlock == 0)
+          OnlyBlock = User->getParent();
+        else if (OnlyBlock != User->getParent())
+          OnlyUsedInOneBlock = false;
+      }
+    }
+
+    // If the alloca is only read and written in one basic block, just perform a
+    // linear sweep over the block to eliminate it.
+    if (OnlyUsedInOneBlock) {
+      LocallyUsedAllocas[OnlyBlock].push_back(AI);
+
+      // Remove the alloca from the Allocas list, since it will be processed.
+      Allocas[AllocaNum] = Allocas.back();
+      Allocas.pop_back();
+      --AllocaNum;
+      continue;
+    }
+
+    // If there is only a single store to this value, replace any loads of
+    // it that are directly dominated by the definition with the value stored.
+    if (DefiningBlocks.size() == 1) {
+      // Be aware of loads before the store.
+      std::set<BasicBlock*> ProcessedBlocks;
+      for (unsigned i = 0, e = UsingBlocks.size(); i != e; ++i)
+        // If the store dominates the block and if we haven't processed it yet,
+        // do so now.
+        if (dominates(OnlyStore->getParent(), UsingBlocks[i]))
+          if (ProcessedBlocks.insert(UsingBlocks[i]).second) {
+            BasicBlock *UseBlock = UsingBlocks[i];
+            
+            // If the use and store are in the same block, do a quick scan to
+            // verify that there are no uses before the store.
+            if (UseBlock == OnlyStore->getParent()) {
+              BasicBlock::iterator I = UseBlock->begin();
+              for (; &*I != OnlyStore; ++I) { // scan block for store.
+                if (isa<LoadInst>(I) && I->getOperand(0) == AI)
+                  break;
+              }
+              if (&*I != OnlyStore) break;  // Do not handle this case.
+            }
+        
+            // Otherwise, if this is a different block or if all uses happen
+            // after the store, do a simple linear scan to replace loads with
+            // the stored value.
+            for (BasicBlock::iterator I = UseBlock->begin(),E = UseBlock->end();
+                 I != E; ) {
+              if (LoadInst *LI = dyn_cast<LoadInst>(I++)) {
+                if (LI->getOperand(0) == AI) {
+                  LI->replaceAllUsesWith(OnlyStore->getOperand(0));
+                  if (AST && isa<PointerType>(LI->getType()))
+                    AST->deleteValue(LI);
+                  LI->eraseFromParent();
+                }
+              }
+            }
+            
+            // Finally, remove this block from the UsingBlock set.
+            UsingBlocks[i] = UsingBlocks.back();
+            --i; --e;
+          }
+
+      // Finally, after the scan, check to see if the store is all that is left.
+      if (UsingBlocks.empty()) {
+        // The alloca has been processed, move on.
+        Allocas[AllocaNum] = Allocas.back();
+        Allocas.pop_back();
+        --AllocaNum;
+        continue;
+      }
+    }
+    
+    
+    if (AST)
+      PointerAllocaValues[AllocaNum] = AllocaPointerVal;
+
+    // If we haven't computed a numbering for the BB's in the function, do so
+    // now.
+    if (BBNumbers.empty()) {
+      unsigned ID = 0;
+      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+        BBNumbers[I] = ID++;
+    }
+
+    // Compute the locations where PhiNodes need to be inserted.  Look at the
+    // dominance frontier of EACH basic-block we have a write in.
+    //
+    unsigned CurrentVersion = 0;
+    SmallPtrSet<PHINode*, 16> InsertedPHINodes;
+    std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks;
+    while (!DefiningBlocks.empty()) {
+      BasicBlock *BB = DefiningBlocks.back();
+      DefiningBlocks.pop_back();
+
+      // Look up the DF for this write, add it to PhiNodes
+      DominanceFrontier::const_iterator it = DF.find(BB);
+      if (it != DF.end()) {
+        const DominanceFrontier::DomSetType &S = it->second;
+
+        // In theory we don't need the indirection through the DFBlocks vector.
+        // In practice, the order of calling QueuePhiNode would depend on the
+        // (unspecified) ordering of basic blocks in the dominance frontier,
+        // which would give PHI nodes non-determinstic subscripts.  Fix this by
+        // processing blocks in order of the occurance in the function.
+        for (DominanceFrontier::DomSetType::const_iterator P = S.begin(),
+             PE = S.end(); P != PE; ++P)
+          DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P));
+
+        // Sort by which the block ordering in the function.
+        std::sort(DFBlocks.begin(), DFBlocks.end());
+
+        for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) {
+          BasicBlock *BB = DFBlocks[i].second;
+          if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes))
+            DefiningBlocks.push_back(BB);
+        }
+        DFBlocks.clear();
+      }
+    }
+
+    // Now that we have inserted PHI nodes along the Iterated Dominance Frontier
+    // of the writes to the variable, scan through the reads of the variable,
+    // marking PHI nodes which are actually necessary as alive (by removing them
+    // from the InsertedPHINodes set).  This is not perfect: there may PHI
+    // marked alive because of loads which are dominated by stores, but there
+    // will be no unmarked PHI nodes which are actually used.
+    //
+    for (unsigned i = 0, e = UsingBlocks.size(); i != e; ++i)
+      MarkDominatingPHILive(UsingBlocks[i], AllocaNum, InsertedPHINodes);
+    UsingBlocks.clear();
+
+    // If there are any PHI nodes which are now known to be dead, remove them!
+    for (SmallPtrSet<PHINode*, 16>::iterator I = InsertedPHINodes.begin(),
+           E = InsertedPHINodes.end(); I != E; ++I) {
+      PHINode *PN = *I;
+      bool Erased=NewPhiNodes.erase(std::make_pair(PN->getParent(), AllocaNum));
+      Erased=Erased;
+      assert(Erased && "PHI already removed?");
+      
+      if (AST && isa<PointerType>(PN->getType()))
+        AST->deleteValue(PN);
+      PN->eraseFromParent();
+      PhiToAllocaMap.erase(PN);
+    }
+
+    // Keep the reverse mapping of the 'Allocas' array.
+    AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+  }
+
+  // Process all allocas which are only used in a single basic block.
+  for (std::map<BasicBlock*, std::vector<AllocaInst*> >::iterator I =
+         LocallyUsedAllocas.begin(), E = LocallyUsedAllocas.end(); I != E; ++I){
+    const std::vector<AllocaInst*> &LocAllocas = I->second;
+    assert(!LocAllocas.empty() && "empty alloca list??");
+
+    // It's common for there to only be one alloca in the list.  Handle it
+    // efficiently.
+    if (LocAllocas.size() == 1) {
+      // If we can do the quick promotion pass, do so now.
+      if (PromoteLocallyUsedAlloca(I->first, LocAllocas[0]))
+        RetryList.push_back(LocAllocas[0]);  // Failed, retry later.
+    } else {
+      // Locally promote anything possible.  Note that if this is unable to
+      // promote a particular alloca, it puts the alloca onto the Allocas vector
+      // for global processing.
+      PromoteLocallyUsedAllocas(I->first, LocAllocas);
+    }
+  }
+
+  if (Allocas.empty())
+    return; // All of the allocas must have been trivial!
+
+  // Set the incoming values for the basic block to be null values for all of
+  // the alloca's.  We do this in case there is a load of a value that has not
+  // been stored yet.  In this case, it will get this null value.
+  //
+  std::vector<Value *> Values(Allocas.size());
+  for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+    Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+  // Walks all basic blocks in the function performing the SSA rename algorithm
+  // and inserting the phi nodes we marked as necessary
+  //
+  RenamePassWorkList.clear();
+  RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+  while(!RenamePassWorkList.empty()) {
+    RenamePassData RPD = RenamePassWorkList.back(); 
+    RenamePassWorkList.pop_back();
+    // RenamePass may add new worklist entries.
+    RenamePass(RPD.BB, RPD.Pred, RPD.Values);
+  }
+  
+  // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
+  Visited.clear();
+
+  // Remove the allocas themselves from the function.
+  for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+    Instruction *A = Allocas[i];
+
+    // If there are any uses of the alloca instructions left, they must be in
+    // sections of dead code that were not processed on the dominance frontier.
+    // Just delete the users now.
+    //
+    if (!A->use_empty())
+      A->replaceAllUsesWith(UndefValue::get(A->getType()));
+    if (AST) AST->deleteValue(A);
+    A->eraseFromParent();
+  }
+
+  
+  // Loop over all of the PHI nodes and see if there are any that we can get
+  // rid of because they merge all of the same incoming values.  This can
+  // happen due to undef values coming into the PHI nodes.  This process is
+  // iterative, because eliminating one PHI node can cause others to be removed.
+  bool EliminatedAPHI = true;
+  while (EliminatedAPHI) {
+    EliminatedAPHI = false;
+    
+    for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+           NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+      PHINode *PN = I->second;
+      
+      // If this PHI node merges one value and/or undefs, get the value.
+      if (Value *V = PN->hasConstantValue(true)) {
+        if (!isa<Instruction>(V) ||
+            properlyDominates(cast<Instruction>(V), PN)) {
+          if (AST && isa<PointerType>(PN->getType()))
+            AST->deleteValue(PN);
+          PN->replaceAllUsesWith(V);
+          PN->eraseFromParent();
+          NewPhiNodes.erase(I++);
+          EliminatedAPHI = true;
+          continue;
+        }
+      }
+      ++I;
+    }
+  }
+  
+  // At this point, the renamer has added entries to PHI nodes for all reachable
+  // code.  Unfortunately, there may be unreachable blocks which the renamer
+  // hasn't traversed.  If this is the case, the PHI nodes may not
+  // have incoming values for all predecessors.  Loop over all PHI nodes we have
+  // created, inserting undef values if they are missing any incoming values.
+  //
+  for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+         NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+    // We want to do this once per basic block.  As such, only process a block
+    // when we find the PHI that is the first entry in the block.
+    PHINode *SomePHI = I->second;
+    BasicBlock *BB = SomePHI->getParent();
+    if (&BB->front() != SomePHI)
+      continue;
+
+    // Count the number of preds for BB.
+    SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+
+    // Only do work here if there the PHI nodes are missing incoming values.  We
+    // know that all PHI nodes that were inserted in a block will have the same
+    // number of incoming values, so we can just check any of them.
+    if (SomePHI->getNumIncomingValues() == Preds.size())
+      continue;
+    
+    // Ok, now we know that all of the PHI nodes are missing entries for some
+    // basic blocks.  Start by sorting the incoming predecessors for efficient
+    // access.
+    std::sort(Preds.begin(), Preds.end());
+    
+    // Now we loop through all BB's which have entries in SomePHI and remove
+    // them from the Preds list.
+    for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+      // Do a log(n) search of the Preds list for the entry we want.
+      SmallVector<BasicBlock*, 16>::iterator EntIt =
+        std::lower_bound(Preds.begin(), Preds.end(),
+                         SomePHI->getIncomingBlock(i));
+      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+             "PHI node has entry for a block which is not a predecessor!");
+
+      // Remove the entry
+      Preds.erase(EntIt);
+    }
+
+    // At this point, the blocks left in the preds list must have dummy
+    // entries inserted into every PHI nodes for the block.  Update all the phi
+    // nodes in this block that we are inserting (there could be phis before
+    // mem2reg runs).
+    unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+    BasicBlock::iterator BBI = BB->begin();
+    while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+           SomePHI->getNumIncomingValues() == NumBadPreds) {
+      Value *UndefVal = UndefValue::get(SomePHI->getType());
+      for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
+        SomePHI->addIncoming(UndefVal, Preds[pred]);
+    }
+  }
+        
+  NewPhiNodes.clear();
+}
+
+// MarkDominatingPHILive - Mem2Reg wants to construct "pruned" SSA form, not
+// "minimal" SSA form.  To do this, it inserts all of the PHI nodes on the IDF
+// as usual (inserting the PHI nodes in the DeadPHINodes set), then processes
+// each read of the variable.  For each block that reads the variable, this
+// function is called, which removes used PHI nodes from the DeadPHINodes set.
+// After all of the reads have been processed, any PHI nodes left in the
+// DeadPHINodes set are removed.
+//
+void PromoteMem2Reg::MarkDominatingPHILive(BasicBlock *BB, unsigned AllocaNum,
+                                      SmallPtrSet<PHINode*, 16> &DeadPHINodes) {
+  // Scan the immediate dominators of this block looking for a block which has a
+  // PHI node for Alloca num.  If we find it, mark the PHI node as being alive!
+  DomTreeNode *IDomNode = DT.getNode(BB);
+  for (DomTreeNode *IDom = IDomNode; IDom; IDom = IDom->getIDom()) {
+    BasicBlock *DomBB = IDom->getBlock();
+    DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator
+      I = NewPhiNodes.find(std::make_pair(DomBB, AllocaNum));
+    if (I != NewPhiNodes.end()) {
+      // Ok, we found an inserted PHI node which dominates this value.
+      PHINode *DominatingPHI = I->second;
+
+      // Find out if we previously thought it was dead.  If so, mark it as being
+      // live by removing it from the DeadPHINodes set.
+      if (DeadPHINodes.erase(DominatingPHI)) {
+        // Now that we have marked the PHI node alive, also mark any PHI nodes
+        // which it might use as being alive as well.
+        for (pred_iterator PI = pred_begin(DomBB), PE = pred_end(DomBB);
+             PI != PE; ++PI)
+          MarkDominatingPHILive(*PI, AllocaNum, DeadPHINodes);
+      }
+    }
+  }
+}
+
+/// PromoteLocallyUsedAlloca - Many allocas are only used within a single basic
+/// block.  If this is the case, avoid traversing the CFG and inserting a lot of
+/// potentially useless PHI nodes by just performing a single linear pass over
+/// the basic block using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true.  This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths.  e.g. code like
+/// this is potentially correct:
+///
+///   for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+///
+bool PromoteMem2Reg::PromoteLocallyUsedAlloca(BasicBlock *BB, AllocaInst *AI) {
+  assert(!AI->use_empty() && "There are no uses of the alloca!");
+
+  // Handle degenerate cases quickly.
+  if (AI->hasOneUse()) {
+    Instruction *U = cast<Instruction>(AI->use_back());
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // Must be a load of uninitialized value.
+      LI->replaceAllUsesWith(UndefValue::get(AI->getAllocatedType()));
+      if (AST && isa<PointerType>(LI->getType()))
+        AST->deleteValue(LI);
+    } else {
+      // Otherwise it must be a store which is never read.
+      assert(isa<StoreInst>(U));
+    }
+    BB->getInstList().erase(U);
+  } else {
+    // Uses of the uninitialized memory location shall get undef.
+    Value *CurVal = 0;
+
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+      Instruction *Inst = I++;
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        if (LI->getOperand(0) == AI) {
+          if (!CurVal) return true;  // Could not locally promote!
+
+          // Loads just returns the "current value"...
+          LI->replaceAllUsesWith(CurVal);
+          if (AST && isa<PointerType>(LI->getType()))
+            AST->deleteValue(LI);
+          BB->getInstList().erase(LI);
+        }
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        if (SI->getOperand(1) == AI) {
+          // Store updates the "current value"...
+          CurVal = SI->getOperand(0);
+          BB->getInstList().erase(SI);
+        }
+      }
+    }
+  }
+
+  // After traversing the basic block, there should be no more uses of the
+  // alloca, remove it now.
+  assert(AI->use_empty() && "Uses of alloca from more than one BB??");
+  if (AST) AST->deleteValue(AI);
+  AI->getParent()->getInstList().erase(AI);
+  return false;
+}
+
+/// PromoteLocallyUsedAllocas - This method is just like
+/// PromoteLocallyUsedAlloca, except that it processes multiple alloca
+/// instructions in parallel.  This is important in cases where we have large
+/// basic blocks, as we don't want to rescan the entire basic block for each
+/// alloca which is locally used in it (which might be a lot).
+void PromoteMem2Reg::
+PromoteLocallyUsedAllocas(BasicBlock *BB, const std::vector<AllocaInst*> &AIs) {
+  std::map<AllocaInst*, Value*> CurValues;
+  for (unsigned i = 0, e = AIs.size(); i != e; ++i)
+    CurValues[AIs[i]] = 0; // Insert with null value
+
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+    Instruction *Inst = I++;
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+      // Is this a load of an alloca we are tracking?
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(LI->getOperand(0))) {
+        std::map<AllocaInst*, Value*>::iterator AIt = CurValues.find(AI);
+        if (AIt != CurValues.end()) {
+          // If loading an uninitialized value, allow the inter-block case to
+          // handle it.  Due to control flow, this might actually be ok.
+          if (AIt->second == 0) {  // Use of locally uninitialized value??
+            RetryList.push_back(AI);   // Retry elsewhere.
+            CurValues.erase(AIt);   // Stop tracking this here.
+            if (CurValues.empty()) return;
+          } else {
+            // Loads just returns the "current value"...
+            LI->replaceAllUsesWith(AIt->second);
+            if (AST && isa<PointerType>(LI->getType()))
+              AST->deleteValue(LI);
+            BB->getInstList().erase(LI);
+          }
+        }
+      }
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(SI->getOperand(1))) {
+        std::map<AllocaInst*, Value*>::iterator AIt = CurValues.find(AI);
+        if (AIt != CurValues.end()) {
+          // Store updates the "current value"...
+          AIt->second = SI->getOperand(0);
+          BB->getInstList().erase(SI);
+        }
+      }
+    }
+  }
+}
+
+
+
+// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
+// Alloca returns true if there wasn't already a phi-node for that variable
+//
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+                                  unsigned &Version,
+                                  SmallPtrSet<PHINode*, 16> &InsertedPHINodes) {
+  // Look up the basic-block in question.
+  PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
+
+  // If the BB already has a phi node added for the i'th alloca then we're done!
+  if (PN) return false;
+
+  // Create a PhiNode using the dereferenced type... and add the phi-node to the
+  // BasicBlock.
+  PN = new PHINode(Allocas[AllocaNo]->getAllocatedType(),
+                   Allocas[AllocaNo]->getName() + "." +
+                   utostr(Version++), BB->begin());
+  PhiToAllocaMap[PN] = AllocaNo;
+  
+  InsertedPHINodes.insert(PN);
+
+  if (AST && isa<PointerType>(PN->getType()))
+    AST->copyValue(PointerAllocaValues[AllocaNo], PN);
+
+  return true;
+}
+
+
+// RenamePass - Recursively traverse the CFG of the function, renaming loads and
+// stores to the allocas which we are promoting.  IncomingVals indicates what
+// value each Alloca contains on exit from the predecessor block Pred.
+//
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+                                std::vector<Value*> &IncomingVals) {
+  // If we are inserting any phi nodes into this BB, they will already be in the
+  // block.
+  if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+    // Pred may have multiple edges to BB.  If so, we want to add N incoming
+    // values to each PHI we are inserting on the first time we see the edge.
+    // Check to see if APN already has incoming values from Pred.  This also
+    // prevents us from modifying PHI nodes that are not currently being
+    // inserted.
+    bool HasPredEntries = false;
+    for (unsigned i = 0, e = APN->getNumIncomingValues(); i != e; ++i) {
+      if (APN->getIncomingBlock(i) == Pred) {
+        HasPredEntries = true;
+        break;
+      }
+    }
+    
+    // If we have PHI nodes to update, compute the number of edges from Pred to
+    // BB.
+    if (!HasPredEntries) {
+      TerminatorInst *PredTerm = Pred->getTerminator();
+      unsigned NumEdges = 0;
+      for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
+        if (PredTerm->getSuccessor(i) == BB)
+          ++NumEdges;
+      }
+      assert(NumEdges && "Must be at least one edge from Pred to BB!");
+      
+      // Add entries for all the phis.
+      BasicBlock::iterator PNI = BB->begin();
+      do {
+        unsigned AllocaNo = PhiToAllocaMap[APN];
+        
+        // Add N incoming values to the PHI node.
+        for (unsigned i = 0; i != NumEdges; ++i)
+          APN->addIncoming(IncomingVals[AllocaNo], Pred);
+        
+        // The currently active variable for this block is now the PHI.
+        IncomingVals[AllocaNo] = APN;
+        
+        // Get the next phi node.
+        ++PNI;
+        APN = dyn_cast<PHINode>(PNI);
+        if (APN == 0) break;
+        
+        // Verify it doesn't already have entries for Pred.  If it does, it is
+        // not being inserted by this mem2reg invocation.
+        HasPredEntries = false;
+        for (unsigned i = 0, e = APN->getNumIncomingValues(); i != e; ++i) {
+          if (APN->getIncomingBlock(i) == Pred) {
+            HasPredEntries = true;
+            break;
+          }
+        }
+      } while (!HasPredEntries);
+    }
+  }
+  
+  // Don't revisit blocks.
+  if (!Visited.insert(BB)) return;
+
+  for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+    Instruction *I = II++; // get the instruction, increment iterator
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      if (AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
+        std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+        if (AI != AllocaLookup.end()) {
+          Value *V = IncomingVals[AI->second];
+
+          // walk the use list of this load and replace all uses with r
+          LI->replaceAllUsesWith(V);
+          if (AST && isa<PointerType>(LI->getType()))
+            AST->deleteValue(LI);
+          BB->getInstList().erase(LI);
+        }
+      }
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      // Delete this instruction and mark the name as the current holder of the
+      // value
+      if (AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
+        std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+        if (ai != AllocaLookup.end()) {
+          // what value were we writing?
+          IncomingVals[ai->second] = SI->getOperand(0);
+          BB->getInstList().erase(SI);
+        }
+      }
+    }
+  }
+
+  // Recurse to our successors.
+  TerminatorInst *TI = BB->getTerminator();
+  for (unsigned i = 0; i != TI->getNumSuccessors(); i++)
+    RenamePassWorkList.push_back(RenamePassData(TI->getSuccessor(i), BB, IncomingVals));
+}
+
+/// PromoteMemToReg - Promote the specified list of alloca instructions into
+/// scalar registers, inserting PHI nodes as appropriate.  This function makes
+/// use of DominanceFrontier information.  This function does not modify the CFG
+/// of the function at all.  All allocas must be from the same function.
+///
+/// If AST is specified, the specified tracker is updated to reflect changes
+/// made to the IR.
+///
+void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
+                           DominatorTree &DT, DominanceFrontier &DF,
+                           AliasSetTracker *AST) {
+  // If there is nothing to do, bail out...
+  if (Allocas.empty()) return;
+
+  SmallVector<AllocaInst*, 16> RetryList;
+  PromoteMem2Reg(Allocas, RetryList, DT, DF, AST).run();
+
+  // PromoteMem2Reg may not have been able to promote all of the allocas in one
+  // pass, run it again if needed.
+  std::vector<AllocaInst*> NewAllocas;
+  while (!RetryList.empty()) {
+    // If we need to retry some allocas, this is due to there being no store
+    // before a read in a local block.  To counteract this, insert a store of
+    // undef into the alloca right after the alloca itself.
+    for (unsigned i = 0, e = RetryList.size(); i != e; ++i) {
+      BasicBlock::iterator BBI = RetryList[i];
+
+      new StoreInst(UndefValue::get(RetryList[i]->getAllocatedType()),
+                    RetryList[i], ++BBI);
+    }
+
+    NewAllocas.assign(RetryList.begin(), RetryList.end());
+    RetryList.clear();
+    PromoteMem2Reg(NewAllocas, RetryList, DT, DF, AST).run();
+    NewAllocas.clear();
+  }
+}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 0000000..6c34d02
--- /dev/null
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,1905 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <algorithm>
+#include <functional>
+#include <set>
+#include <map>
+using namespace llvm;
+
+/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// terminator instructions together.
+///
+static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+  if (SI1 == SI2) return false;  // Can't merge with self!
+  
+  // It is not safe to merge these two switch instructions if they have a common
+  // successor, and if that successor has a PHI node, and if *that* PHI node has
+  // conflicting incoming values from the two switch blocks.
+  BasicBlock *SI1BB = SI1->getParent();
+  BasicBlock *SI2BB = SI2->getParent();
+  SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+  
+  for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+    if (SI1Succs.count(*I))
+      for (BasicBlock::iterator BBI = (*I)->begin();
+           isa<PHINode>(BBI); ++BBI) {
+        PHINode *PN = cast<PHINode>(BBI);
+        if (PN->getIncomingValueForBlock(SI1BB) !=
+            PN->getIncomingValueForBlock(SI2BB))
+          return false;
+      }
+        
+  return true;
+}
+
+/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
+/// now be entries in it from the 'NewPred' block.  The values that will be
+/// flowing into the PHI nodes will be the same as those coming in from
+/// ExistPred, an existing predecessor of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+                                  BasicBlock *ExistPred) {
+  assert(std::find(succ_begin(ExistPred), succ_end(ExistPred), Succ) !=
+         succ_end(ExistPred) && "ExistPred is not a predecessor of Succ!");
+  if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+  
+  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    Value *V = PN->getIncomingValueForBlock(ExistPred);
+    PN->addIncoming(V, NewPred);
+  }
+}
+
+// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+// almost-empty BB ending in an unconditional branch to Succ, into succ.
+//
+// Assumption: Succ is the single successor for BB.
+//
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+  assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+  // Check to see if one of the predecessors of BB is already a predecessor of
+  // Succ.  If so, we cannot do the transformation if there are any PHI nodes
+  // with incompatible values coming in from the two edges!
+  //
+  if (isa<PHINode>(Succ->front())) {
+    SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+    for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+         PI != PE; ++PI)
+      if (BBPreds.count(*PI)) {
+        // Loop over all of the PHI nodes checking to see if there are
+        // incompatible values coming in.
+        for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+          PHINode *PN = cast<PHINode>(I);
+          // Loop up the entries in the PHI node for BB and for *PI if the
+          // values coming in are non-equal, we cannot merge these two blocks
+          // (instead we should insert a conditional move or something, then
+          // merge the blocks).
+          if (PN->getIncomingValueForBlock(BB) !=
+              PN->getIncomingValueForBlock(*PI))
+            return false;  // Values are not equal...
+        }
+      }
+  }
+    
+  // Finally, if BB has PHI nodes that are used by things other than the PHIs in
+  // Succ and Succ has predecessors that are not Succ and not Pred, we cannot
+  // fold these blocks, as we don't know whether BB dominates Succ or not to
+  // update the PHI nodes correctly.
+  if (!isa<PHINode>(BB->begin()) || Succ->getSinglePredecessor()) return true;
+
+  // If the predecessors of Succ are only BB and Succ itself, handle it.
+  bool IsSafe = true;
+  for (pred_iterator PI = pred_begin(Succ), E = pred_end(Succ); PI != E; ++PI)
+    if (*PI != Succ && *PI != BB) {
+      IsSafe = false;
+      break;
+    }
+  if (IsSafe) return true;
+  
+  // If the PHI nodes in BB are only used by instructions in Succ, we are ok if
+  // BB and Succ have no common predecessors.
+  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E;
+         ++UI)
+      if (cast<Instruction>(*UI)->getParent() != Succ)
+        return false;
+  }
+  
+  // Scan the predecessor sets of BB and Succ, making sure there are no common
+  // predecessors.  Common predecessors would cause us to build a phi node with
+  // differing incoming values, which is not legal.
+  SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+  for (pred_iterator PI = pred_begin(Succ), E = pred_end(Succ); PI != E; ++PI)
+    if (BBPreds.count(*PI))
+      return false;
+    
+  return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB contains an unconditional
+/// branch to Succ, and contains no instructions other than PHI nodes and the
+/// branch.  If possible, eliminate BB.
+static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
+                                                    BasicBlock *Succ) {
+  // If our successor has PHI nodes, then we need to update them to include
+  // entries for BB's predecessors, not for BB itself.  Be careful though,
+  // if this transformation fails (returns true) then we cannot do this
+  // transformation!
+  //
+  if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+  
+  DOUT << "Killing Trivial BB: \n" << *BB;
+  
+  if (isa<PHINode>(Succ->begin())) {
+    // If there is more than one pred of succ, and there are PHI nodes in
+    // the successor, then we need to add incoming edges for the PHI nodes
+    //
+    const std::vector<BasicBlock*> BBPreds(pred_begin(BB), pred_end(BB));
+    
+    // Loop over all of the PHI nodes in the successor of BB.
+    for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PN = cast<PHINode>(I);
+      Value *OldVal = PN->removeIncomingValue(BB, false);
+      assert(OldVal && "No entry in PHI for Pred BB!");
+      
+      // If this incoming value is one of the PHI nodes in BB, the new entries
+      // in the PHI node are the entries from the old PHI.
+      if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+        PHINode *OldValPN = cast<PHINode>(OldVal);
+        for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+          PN->addIncoming(OldValPN->getIncomingValue(i),
+                          OldValPN->getIncomingBlock(i));
+      } else {
+        for (std::vector<BasicBlock*>::const_iterator PredI = BBPreds.begin(),
+             End = BBPreds.end(); PredI != End; ++PredI) {
+          // Add an incoming value for each of the new incoming values...
+          PN->addIncoming(OldVal, *PredI);
+        }
+      }
+    }
+  }
+  
+  if (isa<PHINode>(&BB->front())) {
+    std::vector<BasicBlock*>
+    OldSuccPreds(pred_begin(Succ), pred_end(Succ));
+    
+    // Move all PHI nodes in BB to Succ if they are alive, otherwise
+    // delete them.
+    while (PHINode *PN = dyn_cast<PHINode>(&BB->front()))
+      if (PN->use_empty()) {
+        // Just remove the dead phi.  This happens if Succ's PHIs were the only
+        // users of the PHI nodes.
+        PN->eraseFromParent();
+      } else {
+        // The instruction is alive, so this means that Succ must have
+        // *ONLY* had BB as a predecessor, and the PHI node is still valid
+        // now.  Simply move it into Succ, because we know that BB
+        // strictly dominated Succ.
+        Succ->getInstList().splice(Succ->begin(),
+                                   BB->getInstList(), BB->begin());
+        
+        // We need to add new entries for the PHI node to account for
+        // predecessors of Succ that the PHI node does not take into
+        // account.  At this point, since we know that BB dominated succ,
+        // this means that we should any newly added incoming edges should
+        // use the PHI node as the value for these edges, because they are
+        // loop back edges.
+        for (unsigned i = 0, e = OldSuccPreds.size(); i != e; ++i)
+          if (OldSuccPreds[i] != BB)
+            PN->addIncoming(PN, OldSuccPreds[i]);
+      }
+  }
+    
+  // Everything that jumped to BB now goes to Succ.
+  BB->replaceAllUsesWith(Succ);
+  if (!Succ->hasName()) Succ->takeName(BB);
+  BB->eraseFromParent();              // Delete the old basic block.
+  return true;
+}
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and
+/// presumably PHI nodes in it), check to see if the merge at this block is due
+/// to an "if condition".  If so, return the boolean condition that determines
+/// which entry into BB will be taken.  Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+///
+static Value *GetIfCondition(BasicBlock *BB,
+                             BasicBlock *&IfTrue, BasicBlock *&IfFalse) {
+  assert(std::distance(pred_begin(BB), pred_end(BB)) == 2 &&
+         "Function can only handle blocks with 2 predecessors!");
+  BasicBlock *Pred1 = *pred_begin(BB);
+  BasicBlock *Pred2 = *++pred_begin(BB);
+
+  // We can only handle branches.  Other control flow will be lowered to
+  // branches if possible anyway.
+  if (!isa<BranchInst>(Pred1->getTerminator()) ||
+      !isa<BranchInst>(Pred2->getTerminator()))
+    return 0;
+  BranchInst *Pred1Br = cast<BranchInst>(Pred1->getTerminator());
+  BranchInst *Pred2Br = cast<BranchInst>(Pred2->getTerminator());
+
+  // Eliminate code duplication by ensuring that Pred1Br is conditional if
+  // either are.
+  if (Pred2Br->isConditional()) {
+    // If both branches are conditional, we don't have an "if statement".  In
+    // reality, we could transform this case, but since the condition will be
+    // required anyway, we stand no chance of eliminating it, so the xform is
+    // probably not profitable.
+    if (Pred1Br->isConditional())
+      return 0;
+
+    std::swap(Pred1, Pred2);
+    std::swap(Pred1Br, Pred2Br);
+  }
+
+  if (Pred1Br->isConditional()) {
+    // If we found a conditional branch predecessor, make sure that it branches
+    // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
+    if (Pred1Br->getSuccessor(0) == BB &&
+        Pred1Br->getSuccessor(1) == Pred2) {
+      IfTrue = Pred1;
+      IfFalse = Pred2;
+    } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+               Pred1Br->getSuccessor(1) == BB) {
+      IfTrue = Pred2;
+      IfFalse = Pred1;
+    } else {
+      // We know that one arm of the conditional goes to BB, so the other must
+      // go somewhere unrelated, and this must not be an "if statement".
+      return 0;
+    }
+
+    // The only thing we have to watch out for here is to make sure that Pred2
+    // doesn't have incoming edges from other blocks.  If it does, the condition
+    // doesn't dominate BB.
+    if (++pred_begin(Pred2) != pred_end(Pred2))
+      return 0;
+
+    return Pred1Br->getCondition();
+  }
+
+  // Ok, if we got here, both predecessors end with an unconditional branch to
+  // BB.  Don't panic!  If both blocks only have a single (identical)
+  // predecessor, and THAT is a conditional branch, then we're all ok!
+  if (pred_begin(Pred1) == pred_end(Pred1) ||
+      ++pred_begin(Pred1) != pred_end(Pred1) ||
+      pred_begin(Pred2) == pred_end(Pred2) ||
+      ++pred_begin(Pred2) != pred_end(Pred2) ||
+      *pred_begin(Pred1) != *pred_begin(Pred2))
+    return 0;
+
+  // Otherwise, if this is a conditional branch, then we can use it!
+  BasicBlock *CommonPred = *pred_begin(Pred1);
+  if (BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator())) {
+    assert(BI->isConditional() && "Two successors but not conditional?");
+    if (BI->getSuccessor(0) == Pred1) {
+      IfTrue = Pred1;
+      IfFalse = Pred2;
+    } else {
+      IfTrue = Pred2;
+      IfFalse = Pred1;
+    }
+    return BI->getCondition();
+  }
+  return 0;
+}
+
+
+// If we have a merge point of an "if condition" as accepted above, return true
+// if the specified value dominates the block.  We don't handle the true
+// generality of domination here, just a special case which works well enough
+// for us.
+//
+// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+// see if V (which must be an instruction) is cheap to compute and is
+// non-trapping.  If both are true, the instruction is inserted into the set and
+// true is returned.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+                                std::set<Instruction*> *AggressiveInsts) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) {
+    // Non-instructions all dominate instructions, but not all constantexprs
+    // can be executed unconditionally.
+    if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
+      if (C->canTrap())
+        return false;
+    return true;
+  }
+  BasicBlock *PBB = I->getParent();
+
+  // We don't want to allow weird loops that might have the "if condition" in
+  // the bottom of this block.
+  if (PBB == BB) return false;
+
+  // If this instruction is defined in a block that contains an unconditional
+  // branch to BB, then it must be in the 'conditional' part of the "if
+  // statement".
+  if (BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator()))
+    if (BI->isUnconditional() && BI->getSuccessor(0) == BB) {
+      if (!AggressiveInsts) return false;
+      // Okay, it looks like the instruction IS in the "condition".  Check to
+      // see if its a cheap instruction to unconditionally compute, and if it
+      // only uses stuff defined outside of the condition.  If so, hoist it out.
+      switch (I->getOpcode()) {
+      default: return false;  // Cannot hoist this out safely.
+      case Instruction::Load:
+        // We can hoist loads that are non-volatile and obviously cannot trap.
+        if (cast<LoadInst>(I)->isVolatile())
+          return false;
+        if (!isa<AllocaInst>(I->getOperand(0)) &&
+            !isa<Constant>(I->getOperand(0)))
+          return false;
+
+        // Finally, we have to check to make sure there are no instructions
+        // before the load in its basic block, as we are going to hoist the loop
+        // out to its predecessor.
+        if (PBB->begin() != BasicBlock::iterator(I))
+          return false;
+        break;
+      case Instruction::Add:
+      case Instruction::Sub:
+      case Instruction::And:
+      case Instruction::Or:
+      case Instruction::Xor:
+      case Instruction::Shl:
+      case Instruction::LShr:
+      case Instruction::AShr:
+      case Instruction::ICmp:
+      case Instruction::FCmp:
+        break;   // These are all cheap and non-trapping instructions.
+      }
+
+      // Okay, we can only really hoist these out if their operands are not
+      // defined in the conditional region.
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+        if (!DominatesMergePoint(I->getOperand(i), BB, 0))
+          return false;
+      // Okay, it's safe to do this!  Remember this instruction.
+      AggressiveInsts->insert(I);
+    }
+
+  return true;
+}
+
+// GatherConstantSetEQs - Given a potentially 'or'd together collection of 
+// icmp_eq instructions that compare a value against a constant, return the 
+// value being compared, and stick the constant into the Values vector.
+static Value *GatherConstantSetEQs(Value *V, std::vector<ConstantInt*> &Values){
+  if (Instruction *Inst = dyn_cast<Instruction>(V))
+    if (Inst->getOpcode() == Instruction::ICmp &&
+        cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_EQ) {
+      if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(1))) {
+        Values.push_back(C);
+        return Inst->getOperand(0);
+      } else if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
+        Values.push_back(C);
+        return Inst->getOperand(1);
+      }
+    } else if (Inst->getOpcode() == Instruction::Or) {
+      if (Value *LHS = GatherConstantSetEQs(Inst->getOperand(0), Values))
+        if (Value *RHS = GatherConstantSetEQs(Inst->getOperand(1), Values))
+          if (LHS == RHS)
+            return LHS;
+    }
+  return 0;
+}
+
+// GatherConstantSetNEs - Given a potentially 'and'd together collection of
+// setne instructions that compare a value against a constant, return the value
+// being compared, and stick the constant into the Values vector.
+static Value *GatherConstantSetNEs(Value *V, std::vector<ConstantInt*> &Values){
+  if (Instruction *Inst = dyn_cast<Instruction>(V))
+    if (Inst->getOpcode() == Instruction::ICmp &&
+               cast<ICmpInst>(Inst)->getPredicate() == ICmpInst::ICMP_NE) {
+      if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(1))) {
+        Values.push_back(C);
+        return Inst->getOperand(0);
+      } else if (ConstantInt *C = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
+        Values.push_back(C);
+        return Inst->getOperand(1);
+      }
+    } else if (Inst->getOpcode() == Instruction::And) {
+      if (Value *LHS = GatherConstantSetNEs(Inst->getOperand(0), Values))
+        if (Value *RHS = GatherConstantSetNEs(Inst->getOperand(1), Values))
+          if (LHS == RHS)
+            return LHS;
+    }
+  return 0;
+}
+
+
+
+/// GatherValueComparisons - If the specified Cond is an 'and' or 'or' of a
+/// bunch of comparisons of one value against constants, return the value and
+/// the constants being compared.
+static bool GatherValueComparisons(Instruction *Cond, Value *&CompVal,
+                                   std::vector<ConstantInt*> &Values) {
+  if (Cond->getOpcode() == Instruction::Or) {
+    CompVal = GatherConstantSetEQs(Cond, Values);
+
+    // Return true to indicate that the condition is true if the CompVal is
+    // equal to one of the constants.
+    return true;
+  } else if (Cond->getOpcode() == Instruction::And) {
+    CompVal = GatherConstantSetNEs(Cond, Values);
+
+    // Return false to indicate that the condition is false if the CompVal is
+    // equal to one of the constants.
+    return false;
+  }
+  return false;
+}
+
+/// ErasePossiblyDeadInstructionTree - If the specified instruction is dead and
+/// has no side effects, nuke it.  If it uses any instructions that become dead
+/// because the instruction is now gone, nuke them too.
+static void ErasePossiblyDeadInstructionTree(Instruction *I) {
+  if (!isInstructionTriviallyDead(I)) return;
+  
+  std::vector<Instruction*> InstrsToInspect;
+  InstrsToInspect.push_back(I);
+
+  while (!InstrsToInspect.empty()) {
+    I = InstrsToInspect.back();
+    InstrsToInspect.pop_back();
+
+    if (!isInstructionTriviallyDead(I)) continue;
+
+    // If I is in the work list multiple times, remove previous instances.
+    for (unsigned i = 0, e = InstrsToInspect.size(); i != e; ++i)
+      if (InstrsToInspect[i] == I) {
+        InstrsToInspect.erase(InstrsToInspect.begin()+i);
+        --i, --e;
+      }
+
+    // Add operands of dead instruction to worklist.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(i)))
+        InstrsToInspect.push_back(OpI);
+
+    // Remove dead instruction.
+    I->eraseFromParent();
+  }
+}
+
+// isValueEqualityComparison - Return true if the specified terminator checks to
+// see if a value is equal to constant integer value.
+static Value *isValueEqualityComparison(TerminatorInst *TI) {
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    // Do not permit merging of large switch instructions into their
+    // predecessors unless there is only one predecessor.
+    if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()),
+                                               pred_end(SI->getParent())) > 128)
+      return 0;
+
+    return SI->getCondition();
+  }
+  if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+    if (BI->isConditional() && BI->getCondition()->hasOneUse())
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+        if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
+             ICI->getPredicate() == ICmpInst::ICMP_NE) &&
+            isa<ConstantInt>(ICI->getOperand(1)))
+          return ICI->getOperand(0);
+  return 0;
+}
+
+// Given a value comparison instruction, decode all of the 'cases' that it
+// represents and return the 'default' block.
+static BasicBlock *
+GetValueEqualityComparisonCases(TerminatorInst *TI,
+                                std::vector<std::pair<ConstantInt*,
+                                                      BasicBlock*> > &Cases) {
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    Cases.reserve(SI->getNumCases());
+    for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+      Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i)));
+    return SI->getDefaultDest();
+  }
+
+  BranchInst *BI = cast<BranchInst>(TI);
+  ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+  Cases.push_back(std::make_pair(cast<ConstantInt>(ICI->getOperand(1)),
+                                 BI->getSuccessor(ICI->getPredicate() ==
+                                                  ICmpInst::ICMP_NE)));
+  return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+
+// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries
+// in the list that match the specified block.
+static void EliminateBlockCases(BasicBlock *BB,
+               std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases) {
+  for (unsigned i = 0, e = Cases.size(); i != e; ++i)
+    if (Cases[i].second == BB) {
+      Cases.erase(Cases.begin()+i);
+      --i; --e;
+    }
+}
+
+// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
+// well.
+static bool
+ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
+              std::vector<std::pair<ConstantInt*, BasicBlock*> > &C2) {
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > *V1 = &C1, *V2 = &C2;
+
+  // Make V1 be smaller than V2.
+  if (V1->size() > V2->size())
+    std::swap(V1, V2);
+
+  if (V1->size() == 0) return false;
+  if (V1->size() == 1) {
+    // Just scan V2.
+    ConstantInt *TheVal = (*V1)[0].first;
+    for (unsigned i = 0, e = V2->size(); i != e; ++i)
+      if (TheVal == (*V2)[i].first)
+        return true;
+  }
+
+  // Otherwise, just sort both lists and compare element by element.
+  std::sort(V1->begin(), V1->end());
+  std::sort(V2->begin(), V2->end());
+  unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+  while (i1 != e1 && i2 != e2) {
+    if ((*V1)[i1].first == (*V2)[i2].first)
+      return true;
+    if ((*V1)[i1].first < (*V2)[i2].first)
+      ++i1;
+    else
+      ++i2;
+  }
+  return false;
+}
+
+// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a
+// terminator instruction and its block is known to only have a single
+// predecessor block, check to see if that predecessor is also a value
+// comparison with the same value, and if that comparison determines the outcome
+// of this comparison.  If so, simplify TI.  This does a very limited form of
+// jump threading.
+static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+                                                          BasicBlock *Pred) {
+  Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+  if (!PredVal) return false;  // Not a value comparison in predecessor.
+
+  Value *ThisVal = isValueEqualityComparison(TI);
+  assert(ThisVal && "This isn't a value comparison!!");
+  if (ThisVal != PredVal) return false;  // Different predicates.
+
+  // Find out information about when control will move from Pred to TI's block.
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+  BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
+                                                        PredCases);
+  EliminateBlockCases(PredDef, PredCases);  // Remove default from cases.
+
+  // Find information about how control leaves this block.
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > ThisCases;
+  BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+  EliminateBlockCases(ThisDef, ThisCases);  // Remove default from cases.
+
+  // If TI's block is the default block from Pred's comparison, potentially
+  // simplify TI based on this knowledge.
+  if (PredDef == TI->getParent()) {
+    // If we are here, we know that the value is none of those cases listed in
+    // PredCases.  If there are any cases in ThisCases that are in PredCases, we
+    // can simplify TI.
+    if (ValuesOverlap(PredCases, ThisCases)) {
+      if (BranchInst *BTI = dyn_cast<BranchInst>(TI)) {
+        // Okay, one of the successors of this condbr is dead.  Convert it to a
+        // uncond br.
+        assert(ThisCases.size() == 1 && "Branch can only have one case!");
+        Value *Cond = BTI->getCondition();
+        // Insert the new branch.
+        Instruction *NI = new BranchInst(ThisDef, TI);
+
+        // Remove PHI node entries for the dead edge.
+        ThisCases[0].second->removePredecessor(TI->getParent());
+
+        DOUT << "Threading pred instr: " << *Pred->getTerminator()
+             << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n";
+
+        TI->eraseFromParent();   // Nuke the old one.
+        // If condition is now dead, nuke it.
+        if (Instruction *CondI = dyn_cast<Instruction>(Cond))
+          ErasePossiblyDeadInstructionTree(CondI);
+        return true;
+
+      } else {
+        SwitchInst *SI = cast<SwitchInst>(TI);
+        // Okay, TI has cases that are statically dead, prune them away.
+        SmallPtrSet<Constant*, 16> DeadCases;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          DeadCases.insert(PredCases[i].first);
+
+        DOUT << "Threading pred instr: " << *Pred->getTerminator()
+             << "Through successor TI: " << *TI;
+
+        for (unsigned i = SI->getNumCases()-1; i != 0; --i)
+          if (DeadCases.count(SI->getCaseValue(i))) {
+            SI->getSuccessor(i)->removePredecessor(TI->getParent());
+            SI->removeCase(i);
+          }
+
+        DOUT << "Leaving: " << *TI << "\n";
+        return true;
+      }
+    }
+
+  } else {
+    // Otherwise, TI's block must correspond to some matched value.  Find out
+    // which value (or set of values) this is.
+    ConstantInt *TIV = 0;
+    BasicBlock *TIBB = TI->getParent();
+    for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+      if (PredCases[i].second == TIBB)
+        if (TIV == 0)
+          TIV = PredCases[i].first;
+        else
+          return false;  // Cannot handle multiple values coming to this block.
+    assert(TIV && "No edge from pred to succ?");
+
+    // Okay, we found the one constant that our value can be if we get into TI's
+    // BB.  Find out which successor will unconditionally be branched to.
+    BasicBlock *TheRealDest = 0;
+    for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+      if (ThisCases[i].first == TIV) {
+        TheRealDest = ThisCases[i].second;
+        break;
+      }
+
+    // If not handled by any explicit cases, it is handled by the default case.
+    if (TheRealDest == 0) TheRealDest = ThisDef;
+
+    // Remove PHI node entries for dead edges.
+    BasicBlock *CheckEdge = TheRealDest;
+    for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+      if (*SI != CheckEdge)
+        (*SI)->removePredecessor(TIBB);
+      else
+        CheckEdge = 0;
+
+    // Insert the new branch.
+    Instruction *NI = new BranchInst(TheRealDest, TI);
+
+    DOUT << "Threading pred instr: " << *Pred->getTerminator()
+         << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n";
+    Instruction *Cond = 0;
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+      Cond = dyn_cast<Instruction>(BI->getCondition());
+    TI->eraseFromParent();   // Nuke the old one.
+
+    if (Cond) ErasePossiblyDeadInstructionTree(Cond);
+    return true;
+  }
+  return false;
+}
+
+// FoldValueComparisonIntoPredecessors - The specified terminator is a value
+// equality comparison instruction (either a switch or a branch on "X == c").
+// See if any of the predecessors of the terminator block are value comparisons
+// on the same value.  If so, and if safe to do so, fold them together.
+static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
+  BasicBlock *BB = TI->getParent();
+  Value *CV = isValueEqualityComparison(TI);  // CondVal
+  assert(CV && "Not a comparison?");
+  bool Changed = false;
+
+  std::vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB));
+  while (!Preds.empty()) {
+    BasicBlock *Pred = Preds.back();
+    Preds.pop_back();
+
+    // See if the predecessor is a comparison with the same value.
+    TerminatorInst *PTI = Pred->getTerminator();
+    Value *PCV = isValueEqualityComparison(PTI);  // PredCondVal
+
+    if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+      // Figure out which 'cases' to copy from SI to PSI.
+      std::vector<std::pair<ConstantInt*, BasicBlock*> > BBCases;
+      BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+      std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+      BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+      // Based on whether the default edge from PTI goes to BB or not, fill in
+      // PredCases and PredDefault with the new switch cases we would like to
+      // build.
+      std::vector<BasicBlock*> NewSuccessors;
+
+      if (PredDefault == BB) {
+        // If this is the default destination from PTI, only the edges in TI
+        // that don't occur in PTI, or that branch to BB will be activated.
+        std::set<ConstantInt*> PTIHandled;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          if (PredCases[i].second != BB)
+            PTIHandled.insert(PredCases[i].first);
+          else {
+            // The default destination is BB, we don't need explicit targets.
+            std::swap(PredCases[i], PredCases.back());
+            PredCases.pop_back();
+            --i; --e;
+          }
+
+        // Reconstruct the new switch statement we will be building.
+        if (PredDefault != BBDefault) {
+          PredDefault->removePredecessor(Pred);
+          PredDefault = BBDefault;
+          NewSuccessors.push_back(BBDefault);
+        }
+        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+          if (!PTIHandled.count(BBCases[i].first) &&
+              BBCases[i].second != BBDefault) {
+            PredCases.push_back(BBCases[i]);
+            NewSuccessors.push_back(BBCases[i].second);
+          }
+
+      } else {
+        // If this is not the default destination from PSI, only the edges
+        // in SI that occur in PSI with a destination of BB will be
+        // activated.
+        std::set<ConstantInt*> PTIHandled;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          if (PredCases[i].second == BB) {
+            PTIHandled.insert(PredCases[i].first);
+            std::swap(PredCases[i], PredCases.back());
+            PredCases.pop_back();
+            --i; --e;
+          }
+
+        // Okay, now we know which constants were sent to BB from the
+        // predecessor.  Figure out where they will all go now.
+        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+          if (PTIHandled.count(BBCases[i].first)) {
+            // If this is one we are capable of getting...
+            PredCases.push_back(BBCases[i]);
+            NewSuccessors.push_back(BBCases[i].second);
+            PTIHandled.erase(BBCases[i].first);// This constant is taken care of
+          }
+
+        // If there are any constants vectored to BB that TI doesn't handle,
+        // they must go to the default destination of TI.
+        for (std::set<ConstantInt*>::iterator I = PTIHandled.begin(),
+               E = PTIHandled.end(); I != E; ++I) {
+          PredCases.push_back(std::make_pair(*I, BBDefault));
+          NewSuccessors.push_back(BBDefault);
+        }
+      }
+
+      // Okay, at this point, we know which new successor Pred will get.  Make
+      // sure we update the number of entries in the PHI nodes for these
+      // successors.
+      for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
+        AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+
+      // Now that the successors are updated, create the new Switch instruction.
+      SwitchInst *NewSI = new SwitchInst(CV, PredDefault, PredCases.size(),PTI);
+      for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+        NewSI->addCase(PredCases[i].first, PredCases[i].second);
+
+      Instruction *DeadCond = 0;
+      if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+        // If PTI is a branch, remember the condition.
+        DeadCond = dyn_cast<Instruction>(BI->getCondition());
+      Pred->getInstList().erase(PTI);
+
+      // If the condition is dead now, remove the instruction tree.
+      if (DeadCond) ErasePossiblyDeadInstructionTree(DeadCond);
+
+      // Okay, last check.  If BB is still a successor of PSI, then we must
+      // have an infinite loop case.  If so, add an infinitely looping block
+      // to handle the case to preserve the behavior of the code.
+      BasicBlock *InfLoopBlock = 0;
+      for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+        if (NewSI->getSuccessor(i) == BB) {
+          if (InfLoopBlock == 0) {
+            // Insert it at the end of the loop, because it's either code,
+            // or it won't matter if it's hot. :)
+            InfLoopBlock = new BasicBlock("infloop", BB->getParent());
+            new BranchInst(InfLoopBlock, InfLoopBlock);
+          }
+          NewSI->setSuccessor(i, InfLoopBlock);
+        }
+
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
+/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
+/// BB2, hoist any common code in the two blocks up into the branch block.  The
+/// caller of this function guarantees that BI's block dominates BB1 and BB2.
+static bool HoistThenElseCodeToIf(BranchInst *BI) {
+  // This does very trivial matching, with limited scanning, to find identical
+  // instructions in the two blocks.  In particular, we don't want to get into
+  // O(M*N) situations here where M and N are the sizes of BB1 and BB2.  As
+  // such, we currently just scan for obviously identical instructions in an
+  // identical order.
+  BasicBlock *BB1 = BI->getSuccessor(0);  // The true destination.
+  BasicBlock *BB2 = BI->getSuccessor(1);  // The false destination
+
+  Instruction *I1 = BB1->begin(), *I2 = BB2->begin();
+  if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || 
+      isa<InvokeInst>(I1) || !I1->isIdenticalTo(I2))
+    return false;
+
+  // If we get here, we can hoist at least one instruction.
+  BasicBlock *BIParent = BI->getParent();
+
+  do {
+    // If we are hoisting the terminator instruction, don't move one (making a
+    // broken BB), instead clone it, and remove BI.
+    if (isa<TerminatorInst>(I1))
+      goto HoistTerminator;
+
+    // For a normal instruction, we just move one to right before the branch,
+    // then replace all uses of the other with the first.  Finally, we remove
+    // the now redundant second instruction.
+    BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+    if (!I2->use_empty())
+      I2->replaceAllUsesWith(I1);
+    BB2->getInstList().erase(I2);
+
+    I1 = BB1->begin();
+    I2 = BB2->begin();
+  } while (I1->getOpcode() == I2->getOpcode() && I1->isIdenticalTo(I2));
+
+  return true;
+
+HoistTerminator:
+  // Okay, it is safe to hoist the terminator.
+  Instruction *NT = I1->clone();
+  BIParent->getInstList().insert(BI, NT);
+  if (NT->getType() != Type::VoidTy) {
+    I1->replaceAllUsesWith(NT);
+    I2->replaceAllUsesWith(NT);
+    NT->takeName(I1);
+  }
+
+  // Hoisting one of the terminators from our successor is a great thing.
+  // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+  // them.  If they do, all PHI entries for BB1/BB2 must agree for all PHI
+  // nodes, so we insert select instruction to compute the final result.
+  std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+    PHINode *PN;
+    for (BasicBlock::iterator BBI = SI->begin();
+         (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
+      if (BB1V != BB2V) {
+        // These values do not agree.  Insert a select instruction before NT
+        // that determines the right value.
+        SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+        if (SI == 0)
+          SI = new SelectInst(BI->getCondition(), BB1V, BB2V,
+                              BB1V->getName()+"."+BB2V->getName(), NT);
+        // Make the PHI node use the select for all incoming values for BB1/BB2
+        for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+          if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+            PN->setIncomingValue(i, SI);
+      }
+    }
+  }
+
+  // Update any PHI nodes in our new successors.
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
+    AddPredecessorToBlock(*SI, BIParent, BB1);
+
+  BI->eraseFromParent();
+  return true;
+}
+
+/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
+/// across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+  unsigned Size = 0;
+  
+  // If this basic block contains anything other than a PHI (which controls the
+  // branch) and branch itself, bail out.  FIXME: improve this in the future.
+  for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI, ++Size) {
+    if (Size > 10) return false;  // Don't clone large BB's.
+    
+    // We can only support instructions that are do not define values that are
+    // live outside of the current basic block.
+    for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+         UI != E; ++UI) {
+      Instruction *U = cast<Instruction>(*UI);
+      if (U->getParent() != BB || isa<PHINode>(U)) return false;
+    }
+    
+    // Looks ok, continue checking.
+  }
+
+  return true;
+}
+
+/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value
+/// that is defined in the same block as the branch and if any PHI entries are
+/// constants, thread edges corresponding to that entry to be branches to their
+/// ultimate destination.
+static bool FoldCondBranchOnPHI(BranchInst *BI) {
+  BasicBlock *BB = BI->getParent();
+  PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
+  // NOTE: we currently cannot transform this case if the PHI node is used
+  // outside of the block.
+  if (!PN || PN->getParent() != BB || !PN->hasOneUse())
+    return false;
+  
+  // Degenerate case of a single entry PHI.
+  if (PN->getNumIncomingValues() == 1) {
+    if (PN->getIncomingValue(0) != PN)
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+    else
+      PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+    PN->eraseFromParent();
+    return true;    
+  }
+
+  // Now we know that this block has multiple preds and two succs.
+  if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+  
+  // Okay, this is a simple enough basic block.  See if any phi values are
+  // constants.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    ConstantInt *CB;
+    if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
+        CB->getType() == Type::Int1Ty) {
+      // Okay, we now know that all edges from PredBB should be revectored to
+      // branch to RealDest.
+      BasicBlock *PredBB = PN->getIncomingBlock(i);
+      BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+      
+      if (RealDest == BB) continue;  // Skip self loops.
+      
+      // The dest block might have PHI nodes, other predecessors and other
+      // difficult cases.  Instead of being smart about this, just insert a new
+      // block that jumps to the destination block, effectively splitting
+      // the edge we are about to create.
+      BasicBlock *EdgeBB = new BasicBlock(RealDest->getName()+".critedge",
+                                          RealDest->getParent(), RealDest);
+      new BranchInst(RealDest, EdgeBB);
+      PHINode *PN;
+      for (BasicBlock::iterator BBI = RealDest->begin();
+           (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+        Value *V = PN->getIncomingValueForBlock(BB);
+        PN->addIncoming(V, EdgeBB);
+      }
+
+      // BB may have instructions that are being threaded over.  Clone these
+      // instructions into EdgeBB.  We know that there will be no uses of the
+      // cloned instructions outside of EdgeBB.
+      BasicBlock::iterator InsertPt = EdgeBB->begin();
+      std::map<Value*, Value*> TranslateMap;  // Track translated values.
+      for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+        if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+          TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+        } else {
+          // Clone the instruction.
+          Instruction *N = BBI->clone();
+          if (BBI->hasName()) N->setName(BBI->getName()+".c");
+          
+          // Update operands due to translation.
+          for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+            std::map<Value*, Value*>::iterator PI =
+              TranslateMap.find(N->getOperand(i));
+            if (PI != TranslateMap.end())
+              N->setOperand(i, PI->second);
+          }
+          
+          // Check for trivial simplification.
+          if (Constant *C = ConstantFoldInstruction(N)) {
+            TranslateMap[BBI] = C;
+            delete N;   // Constant folded away, don't need actual inst
+          } else {
+            // Insert the new instruction into its new home.
+            EdgeBB->getInstList().insert(InsertPt, N);
+            if (!BBI->use_empty())
+              TranslateMap[BBI] = N;
+          }
+        }
+      }
+
+      // Loop over all of the edges from PredBB to BB, changing them to branch
+      // to EdgeBB instead.
+      TerminatorInst *PredBBTI = PredBB->getTerminator();
+      for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+        if (PredBBTI->getSuccessor(i) == BB) {
+          BB->removePredecessor(PredBB);
+          PredBBTI->setSuccessor(i, EdgeBB);
+        }
+      
+      // Recurse, simplifying any other constants.
+      return FoldCondBranchOnPHI(BI) | true;
+    }
+  }
+
+  return false;
+}
+
+/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
+/// PHI node, see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN) {
+  // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
+  // statement", which has a very simple dominance structure.  Basically, we
+  // are trying to find the condition that is being branched on, which
+  // subsequently causes this merge to happen.  We really want control
+  // dependence information for this check, but simplifycfg can't keep it up
+  // to date, and this catches most of the cases we care about anyway.
+  //
+  BasicBlock *BB = PN->getParent();
+  BasicBlock *IfTrue, *IfFalse;
+  Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
+  if (!IfCond) return false;
+  
+  // Okay, we found that we can merge this two-entry phi node into a select.
+  // Doing so would require us to fold *all* two entry phi nodes in this block.
+  // At some point this becomes non-profitable (particularly if the target
+  // doesn't support cmov's).  Only do this transformation if there are two or
+  // fewer PHI nodes in this block.
+  unsigned NumPhis = 0;
+  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+    if (NumPhis > 2)
+      return false;
+  
+  DOUT << "FOUND IF CONDITION!  " << *IfCond << "  T: "
+       << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n";
+  
+  // Loop over the PHI's seeing if we can promote them all to select
+  // instructions.  While we are at it, keep track of the instructions
+  // that need to be moved to the dominating block.
+  std::set<Instruction*> AggressiveInsts;
+  
+  BasicBlock::iterator AfterPHIIt = BB->begin();
+  while (isa<PHINode>(AfterPHIIt)) {
+    PHINode *PN = cast<PHINode>(AfterPHIIt++);
+    if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) {
+      if (PN->getIncomingValue(0) != PN)
+        PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      else
+        PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+    } else if (!DominatesMergePoint(PN->getIncomingValue(0), BB,
+                                    &AggressiveInsts) ||
+               !DominatesMergePoint(PN->getIncomingValue(1), BB,
+                                    &AggressiveInsts)) {
+      return false;
+    }
+  }
+  
+  // If we all PHI nodes are promotable, check to make sure that all
+  // instructions in the predecessor blocks can be promoted as well.  If
+  // not, we won't be able to get rid of the control flow, so it's not
+  // worth promoting to select instructions.
+  BasicBlock *DomBlock = 0, *IfBlock1 = 0, *IfBlock2 = 0;
+  PN = cast<PHINode>(BB->begin());
+  BasicBlock *Pred = PN->getIncomingBlock(0);
+  if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
+    IfBlock1 = Pred;
+    DomBlock = *pred_begin(Pred);
+    for (BasicBlock::iterator I = Pred->begin();
+         !isa<TerminatorInst>(I); ++I)
+      if (!AggressiveInsts.count(I)) {
+        // This is not an aggressive instruction that we can promote.
+        // Because of this, we won't be able to get rid of the control
+        // flow, so the xform is not worth it.
+        return false;
+      }
+  }
+    
+  Pred = PN->getIncomingBlock(1);
+  if (cast<BranchInst>(Pred->getTerminator())->isUnconditional()) {
+    IfBlock2 = Pred;
+    DomBlock = *pred_begin(Pred);
+    for (BasicBlock::iterator I = Pred->begin();
+         !isa<TerminatorInst>(I); ++I)
+      if (!AggressiveInsts.count(I)) {
+        // This is not an aggressive instruction that we can promote.
+        // Because of this, we won't be able to get rid of the control
+        // flow, so the xform is not worth it.
+        return false;
+      }
+  }
+      
+  // If we can still promote the PHI nodes after this gauntlet of tests,
+  // do all of the PHI's now.
+
+  // Move all 'aggressive' instructions, which are defined in the
+  // conditional parts of the if's up to the dominating block.
+  if (IfBlock1) {
+    DomBlock->getInstList().splice(DomBlock->getTerminator(),
+                                   IfBlock1->getInstList(),
+                                   IfBlock1->begin(),
+                                   IfBlock1->getTerminator());
+  }
+  if (IfBlock2) {
+    DomBlock->getInstList().splice(DomBlock->getTerminator(),
+                                   IfBlock2->getInstList(),
+                                   IfBlock2->begin(),
+                                   IfBlock2->getTerminator());
+  }
+  
+  while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+    // Change the PHI node into a select instruction.
+    Value *TrueVal =
+      PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+    Value *FalseVal =
+      PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+    
+    Value *NV = new SelectInst(IfCond, TrueVal, FalseVal, "", AfterPHIIt);
+    PN->replaceAllUsesWith(NV);
+    NV->takeName(PN);
+    
+    BB->getInstList().erase(PN);
+  }
+  return true;
+}
+
+namespace {
+  /// ConstantIntOrdering - This class implements a stable ordering of constant
+  /// integers that does not depend on their address.  This is important for
+  /// applications that sort ConstantInt's to ensure uniqueness.
+  struct ConstantIntOrdering {
+    bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+      return LHS->getValue().ult(RHS->getValue());
+    }
+  };
+}
+
+// SimplifyCFG - This function is used to do simplification of a CFG.  For
+// example, it adjusts branches to branches to eliminate the extra hop, it
+// eliminates unreachable basic blocks, and does other "peephole" optimization
+// of the CFG.  It returns true if a modification was made.
+//
+// WARNING:  The entry node of a function may not be simplified.
+//
+bool llvm::SimplifyCFG(BasicBlock *BB) {
+  bool Changed = false;
+  Function *M = BB->getParent();
+
+  assert(BB && BB->getParent() && "Block not embedded in function!");
+  assert(BB->getTerminator() && "Degenerate basic block encountered!");
+  assert(&BB->getParent()->getEntryBlock() != BB &&
+         "Can't Simplify entry block!");
+
+  // Remove basic blocks that have no predecessors... which are unreachable.
+  if (pred_begin(BB) == pred_end(BB) ||
+      *pred_begin(BB) == BB && ++pred_begin(BB) == pred_end(BB)) {
+    DOUT << "Removing BB: \n" << *BB;
+
+    // Loop through all of our successors and make sure they know that one
+    // of their predecessors is going away.
+    for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+      SI->removePredecessor(BB);
+
+    while (!BB->empty()) {
+      Instruction &I = BB->back();
+      // If this instruction is used, replace uses with an arbitrary
+      // value.  Because control flow can't get here, we don't care
+      // what we replace the value with.  Note that since this block is
+      // unreachable, and all values contained within it must dominate their
+      // uses, that all uses will eventually be removed.
+      if (!I.use_empty())
+        // Make all users of this instruction use undef instead
+        I.replaceAllUsesWith(UndefValue::get(I.getType()));
+
+      // Remove the instruction from the basic block
+      BB->getInstList().pop_back();
+    }
+    M->getBasicBlockList().erase(BB);
+    return true;
+  }
+
+  // Check to see if we can constant propagate this terminator instruction
+  // away...
+  Changed |= ConstantFoldTerminator(BB);
+
+  // If this is a returning block with only PHI nodes in it, fold the return
+  // instruction into any unconditional branch predecessors.
+  //
+  // If any predecessor is a conditional branch that just selects among
+  // different return values, fold the replace the branch/return with a select
+  // and return.
+  if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+    BasicBlock::iterator BBI = BB->getTerminator();
+    if (BBI == BB->begin() || isa<PHINode>(--BBI)) {
+      // Find predecessors that end with branches.
+      std::vector<BasicBlock*> UncondBranchPreds;
+      std::vector<BranchInst*> CondBranchPreds;
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+        TerminatorInst *PTI = (*PI)->getTerminator();
+        if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+          if (BI->isUnconditional())
+            UncondBranchPreds.push_back(*PI);
+          else
+            CondBranchPreds.push_back(BI);
+      }
+
+      // If we found some, do the transformation!
+      if (!UncondBranchPreds.empty()) {
+        while (!UncondBranchPreds.empty()) {
+          BasicBlock *Pred = UncondBranchPreds.back();
+          DOUT << "FOLDING: " << *BB
+               << "INTO UNCOND BRANCH PRED: " << *Pred;
+          UncondBranchPreds.pop_back();
+          Instruction *UncondBranch = Pred->getTerminator();
+          // Clone the return and add it to the end of the predecessor.
+          Instruction *NewRet = RI->clone();
+          Pred->getInstList().push_back(NewRet);
+
+          // If the return instruction returns a value, and if the value was a
+          // PHI node in "BB", propagate the right value into the return.
+          if (NewRet->getNumOperands() == 1)
+            if (PHINode *PN = dyn_cast<PHINode>(NewRet->getOperand(0)))
+              if (PN->getParent() == BB)
+                NewRet->setOperand(0, PN->getIncomingValueForBlock(Pred));
+          // Update any PHI nodes in the returning block to realize that we no
+          // longer branch to them.
+          BB->removePredecessor(Pred);
+          Pred->getInstList().erase(UncondBranch);
+        }
+
+        // If we eliminated all predecessors of the block, delete the block now.
+        if (pred_begin(BB) == pred_end(BB))
+          // We know there are no successors, so just nuke the block.
+          M->getBasicBlockList().erase(BB);
+
+        return true;
+      }
+
+      // Check out all of the conditional branches going to this return
+      // instruction.  If any of them just select between returns, change the
+      // branch itself into a select/return pair.
+      while (!CondBranchPreds.empty()) {
+        BranchInst *BI = CondBranchPreds.back();
+        CondBranchPreds.pop_back();
+        BasicBlock *TrueSucc = BI->getSuccessor(0);
+        BasicBlock *FalseSucc = BI->getSuccessor(1);
+        BasicBlock *OtherSucc = TrueSucc == BB ? FalseSucc : TrueSucc;
+
+        // Check to see if the non-BB successor is also a return block.
+        if (isa<ReturnInst>(OtherSucc->getTerminator())) {
+          // Check to see if there are only PHI instructions in this block.
+          BasicBlock::iterator OSI = OtherSucc->getTerminator();
+          if (OSI == OtherSucc->begin() || isa<PHINode>(--OSI)) {
+            // Okay, we found a branch that is going to two return nodes.  If
+            // there is no return value for this function, just change the
+            // branch into a return.
+            if (RI->getNumOperands() == 0) {
+              TrueSucc->removePredecessor(BI->getParent());
+              FalseSucc->removePredecessor(BI->getParent());
+              new ReturnInst(0, BI);
+              BI->getParent()->getInstList().erase(BI);
+              return true;
+            }
+
+            // Otherwise, figure out what the true and false return values are
+            // so we can insert a new select instruction.
+            Value *TrueValue = TrueSucc->getTerminator()->getOperand(0);
+            Value *FalseValue = FalseSucc->getTerminator()->getOperand(0);
+
+            // Unwrap any PHI nodes in the return blocks.
+            if (PHINode *TVPN = dyn_cast<PHINode>(TrueValue))
+              if (TVPN->getParent() == TrueSucc)
+                TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+            if (PHINode *FVPN = dyn_cast<PHINode>(FalseValue))
+              if (FVPN->getParent() == FalseSucc)
+                FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+
+            // In order for this transformation to be safe, we must be able to
+            // unconditionally execute both operands to the return.  This is
+            // normally the case, but we could have a potentially-trapping
+            // constant expression that prevents this transformation from being
+            // safe.
+            if ((!isa<ConstantExpr>(TrueValue) ||
+                 !cast<ConstantExpr>(TrueValue)->canTrap()) &&
+                (!isa<ConstantExpr>(TrueValue) ||
+                 !cast<ConstantExpr>(TrueValue)->canTrap())) {
+              TrueSucc->removePredecessor(BI->getParent());
+              FalseSucc->removePredecessor(BI->getParent());
+
+              // Insert a new select instruction.
+              Value *NewRetVal;
+              Value *BrCond = BI->getCondition();
+              if (TrueValue != FalseValue)
+                NewRetVal = new SelectInst(BrCond, TrueValue,
+                                           FalseValue, "retval", BI);
+              else
+                NewRetVal = TrueValue;
+              
+              DOUT << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+                   << "\n  " << *BI << "Select = " << *NewRetVal
+                   << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc;
+
+              new ReturnInst(NewRetVal, BI);
+              BI->eraseFromParent();
+              if (Instruction *BrCondI = dyn_cast<Instruction>(BrCond))
+                if (isInstructionTriviallyDead(BrCondI))
+                  BrCondI->eraseFromParent();
+              return true;
+            }
+          }
+        }
+      }
+    }
+  } else if (isa<UnwindInst>(BB->begin())) {
+    // Check to see if the first instruction in this block is just an unwind.
+    // If so, replace any invoke instructions which use this as an exception
+    // destination with call instructions, and any unconditional branch
+    // predecessor with an unwind.
+    //
+    std::vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB));
+    while (!Preds.empty()) {
+      BasicBlock *Pred = Preds.back();
+      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) {
+        if (BI->isUnconditional()) {
+          Pred->getInstList().pop_back();  // nuke uncond branch
+          new UnwindInst(Pred);            // Use unwind.
+          Changed = true;
+        }
+      } else if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))
+        if (II->getUnwindDest() == BB) {
+          // Insert a new branch instruction before the invoke, because this
+          // is now a fall through...
+          BranchInst *BI = new BranchInst(II->getNormalDest(), II);
+          Pred->getInstList().remove(II);   // Take out of symbol table
+
+          // Insert the call now...
+          SmallVector<Value*,8> Args(II->op_begin()+3, II->op_end());
+          CallInst *CI = new CallInst(II->getCalledValue(),
+                                      &Args[0], Args.size(), II->getName(), BI);
+          CI->setCallingConv(II->getCallingConv());
+          // If the invoke produced a value, the Call now does instead
+          II->replaceAllUsesWith(CI);
+          delete II;
+          Changed = true;
+        }
+
+      Preds.pop_back();
+    }
+
+    // If this block is now dead, remove it.
+    if (pred_begin(BB) == pred_end(BB)) {
+      // We know there are no successors, so just nuke the block.
+      M->getBasicBlockList().erase(BB);
+      return true;
+    }
+
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+    if (isValueEqualityComparison(SI)) {
+      // If we only have one predecessor, and if it is a branch on this value,
+      // see if that predecessor totally determines the outcome of this switch.
+      if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+        if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
+          return SimplifyCFG(BB) || 1;
+
+      // If the block only contains the switch, see if we can fold the block
+      // away into any preds.
+      if (SI == &BB->front())
+        if (FoldValueComparisonIntoPredecessors(SI))
+          return SimplifyCFG(BB) || 1;
+    }
+  } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+    if (BI->isUnconditional()) {
+      BasicBlock::iterator BBI = BB->begin();  // Skip over phi nodes...
+      while (isa<PHINode>(*BBI)) ++BBI;
+
+      BasicBlock *Succ = BI->getSuccessor(0);
+      if (BBI->isTerminator() &&  // Terminator is the only non-phi instruction!
+          Succ != BB)             // Don't hurt infinite loops!
+        if (TryToSimplifyUncondBranchFromEmptyBlock(BB, Succ))
+          return 1;
+      
+    } else {  // Conditional branch
+      if (isValueEqualityComparison(BI)) {
+        // If we only have one predecessor, and if it is a branch on this value,
+        // see if that predecessor totally determines the outcome of this
+        // switch.
+        if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+          if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
+            return SimplifyCFG(BB) || 1;
+
+        // This block must be empty, except for the setcond inst, if it exists.
+        BasicBlock::iterator I = BB->begin();
+        if (&*I == BI ||
+            (&*I == cast<Instruction>(BI->getCondition()) &&
+             &*++I == BI))
+          if (FoldValueComparisonIntoPredecessors(BI))
+            return SimplifyCFG(BB) | true;
+      }
+      
+      // If this is a branch on a phi node in the current block, thread control
+      // through this block if any PHI node entries are constants.
+      if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+        if (PN->getParent() == BI->getParent())
+          if (FoldCondBranchOnPHI(BI))
+            return SimplifyCFG(BB) | true;
+
+      // If this basic block is ONLY a setcc and a branch, and if a predecessor
+      // branches to us and one of our successors, fold the setcc into the
+      // predecessor and use logical operations to pick the right destination.
+      BasicBlock *TrueDest  = BI->getSuccessor(0);
+      BasicBlock *FalseDest = BI->getSuccessor(1);
+      if (Instruction *Cond = dyn_cast<Instruction>(BI->getCondition())) {
+        BasicBlock::iterator CondIt = Cond;
+        if ((isa<CmpInst>(Cond) || isa<BinaryOperator>(Cond)) &&
+            Cond->getParent() == BB && &BB->front() == Cond &&
+            &*++CondIt == BI && Cond->hasOneUse() &&
+            TrueDest != BB && FalseDest != BB)
+          for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI!=E; ++PI)
+            if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+              if (PBI->isConditional() && SafeToMergeTerminators(BI, PBI)) {
+                BasicBlock *PredBlock = *PI;
+                if (PBI->getSuccessor(0) == FalseDest ||
+                    PBI->getSuccessor(1) == TrueDest) {
+                  // Invert the predecessors condition test (xor it with true),
+                  // which allows us to write this code once.
+                  Value *NewCond =
+                    BinaryOperator::createNot(PBI->getCondition(),
+                                    PBI->getCondition()->getName()+".not", PBI);
+                  PBI->setCondition(NewCond);
+                  BasicBlock *OldTrue = PBI->getSuccessor(0);
+                  BasicBlock *OldFalse = PBI->getSuccessor(1);
+                  PBI->setSuccessor(0, OldFalse);
+                  PBI->setSuccessor(1, OldTrue);
+                }
+
+                if ((PBI->getSuccessor(0) == TrueDest && FalseDest != BB) ||
+                    (PBI->getSuccessor(1) == FalseDest && TrueDest != BB)) {
+                  // Clone Cond into the predecessor basic block, and or/and the
+                  // two conditions together.
+                  Instruction *New = Cond->clone();
+                  PredBlock->getInstList().insert(PBI, New);
+                  New->takeName(Cond);
+                  Cond->setName(New->getName()+".old");
+                  Instruction::BinaryOps Opcode =
+                    PBI->getSuccessor(0) == TrueDest ?
+                    Instruction::Or : Instruction::And;
+                  Value *NewCond =
+                    BinaryOperator::create(Opcode, PBI->getCondition(),
+                                           New, "bothcond", PBI);
+                  PBI->setCondition(NewCond);
+                  if (PBI->getSuccessor(0) == BB) {
+                    AddPredecessorToBlock(TrueDest, PredBlock, BB);
+                    PBI->setSuccessor(0, TrueDest);
+                  }
+                  if (PBI->getSuccessor(1) == BB) {
+                    AddPredecessorToBlock(FalseDest, PredBlock, BB);
+                    PBI->setSuccessor(1, FalseDest);
+                  }
+                  return SimplifyCFG(BB) | 1;
+                }
+              }
+      }
+
+      // Scan predessor blocks for conditional branches.
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+        if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+          if (PBI != BI && PBI->isConditional()) {
+              
+            // If this block ends with a branch instruction, and if there is a
+            // predecessor that ends on a branch of the same condition, make 
+            // this conditional branch redundant.
+            if (PBI->getCondition() == BI->getCondition() &&
+                PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+              // Okay, the outcome of this conditional branch is statically
+              // knowable.  If this block had a single pred, handle specially.
+              if (BB->getSinglePredecessor()) {
+                // Turn this into a branch on constant.
+                bool CondIsTrue = PBI->getSuccessor(0) == BB;
+                BI->setCondition(ConstantInt::get(Type::Int1Ty, CondIsTrue));
+                return SimplifyCFG(BB);  // Nuke the branch on constant.
+              }
+              
+              // Otherwise, if there are multiple predecessors, insert a PHI 
+              // that merges in the constant and simplify the block result.
+              if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+                PHINode *NewPN = new PHINode(Type::Int1Ty,
+                                            BI->getCondition()->getName()+".pr",
+                                            BB->begin());
+                for (PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+                  if ((PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) &&
+                      PBI != BI && PBI->isConditional() &&
+                      PBI->getCondition() == BI->getCondition() &&
+                      PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+                    bool CondIsTrue = PBI->getSuccessor(0) == BB;
+                    NewPN->addIncoming(ConstantInt::get(Type::Int1Ty, 
+                                                        CondIsTrue), *PI);
+                  } else {
+                    NewPN->addIncoming(BI->getCondition(), *PI);
+                  }
+                
+                BI->setCondition(NewPN);
+                // This will thread the branch.
+                return SimplifyCFG(BB) | true;
+              }
+            }
+            
+            // If this is a conditional branch in an empty block, and if any
+            // predecessors is a conditional branch to one of our destinations,
+            // fold the conditions into logical ops and one cond br.
+            if (&BB->front() == BI) {
+              int PBIOp, BIOp;
+              if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
+                PBIOp = BIOp = 0;
+              } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
+                PBIOp = 0; BIOp = 1;
+              } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
+                PBIOp = 1; BIOp = 0;
+              } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
+                PBIOp = BIOp = 1;
+              } else {
+                PBIOp = BIOp = -1;
+              }
+              
+              // Check to make sure that the other destination of this branch
+              // isn't BB itself.  If so, this is an infinite loop that will
+              // keep getting unwound.
+              if (PBIOp != -1 && PBI->getSuccessor(PBIOp) == BB)
+                PBIOp = BIOp = -1;
+              
+              // Do not perform this transformation if it would require 
+              // insertion of a large number of select instructions. For targets
+              // without predication/cmovs, this is a big pessimization.
+              if (PBIOp != -1) {
+                BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+           
+                unsigned NumPhis = 0;
+                for (BasicBlock::iterator II = CommonDest->begin();
+                     isa<PHINode>(II); ++II, ++NumPhis) {
+                  if (NumPhis > 2) {
+                    // Disable this xform.
+                    PBIOp = -1;
+                    break;
+                  }
+                }
+              }
+
+              // Finally, if everything is ok, fold the branches to logical ops.
+              if (PBIOp != -1) {
+                BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+                BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1);
+
+                // If OtherDest *is* BB, then this is a basic block with just
+                // a conditional branch in it, where one edge (OtherDesg) goes
+                // back to the block.  We know that the program doesn't get
+                // stuck in the infinite loop, so the condition must be such
+                // that OtherDest isn't branched through. Forward to CommonDest,
+                // and avoid an infinite loop at optimizer time.
+                if (OtherDest == BB)
+                  OtherDest = CommonDest;
+                
+                DOUT << "FOLDING BRs:" << *PBI->getParent()
+                     << "AND: " << *BI->getParent();
+                                
+                // BI may have other predecessors.  Because of this, we leave
+                // it alone, but modify PBI.
+                
+                // Make sure we get to CommonDest on True&True directions.
+                Value *PBICond = PBI->getCondition();
+                if (PBIOp)
+                  PBICond = BinaryOperator::createNot(PBICond,
+                                                      PBICond->getName()+".not",
+                                                      PBI);
+                Value *BICond = BI->getCondition();
+                if (BIOp)
+                  BICond = BinaryOperator::createNot(BICond,
+                                                     BICond->getName()+".not",
+                                                     PBI);
+                // Merge the conditions.
+                Value *Cond =
+                  BinaryOperator::createOr(PBICond, BICond, "brmerge", PBI);
+                
+                // Modify PBI to branch on the new condition to the new dests.
+                PBI->setCondition(Cond);
+                PBI->setSuccessor(0, CommonDest);
+                PBI->setSuccessor(1, OtherDest);
+
+                // OtherDest may have phi nodes.  If so, add an entry from PBI's
+                // block that are identical to the entries for BI's block.
+                PHINode *PN;
+                for (BasicBlock::iterator II = OtherDest->begin();
+                     (PN = dyn_cast<PHINode>(II)); ++II) {
+                  Value *V = PN->getIncomingValueForBlock(BB);
+                  PN->addIncoming(V, PBI->getParent());
+                }
+                
+                // We know that the CommonDest already had an edge from PBI to
+                // it.  If it has PHIs though, the PHIs may have different
+                // entries for BB and PBI's BB.  If so, insert a select to make
+                // them agree.
+                for (BasicBlock::iterator II = CommonDest->begin();
+                     (PN = dyn_cast<PHINode>(II)); ++II) {
+                  Value * BIV = PN->getIncomingValueForBlock(BB);
+                  unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+                  Value *PBIV = PN->getIncomingValue(PBBIdx);
+                  if (BIV != PBIV) {
+                    // Insert a select in PBI to pick the right value.
+                    Value *NV = new SelectInst(PBICond, PBIV, BIV,
+                                               PBIV->getName()+".mux", PBI);
+                    PN->setIncomingValue(PBBIdx, NV);
+                  }
+                }
+
+                DOUT << "INTO: " << *PBI->getParent();
+
+                // This basic block is probably dead.  We know it has at least
+                // one fewer predecessor.
+                return SimplifyCFG(BB) | true;
+              }
+            }
+          }
+    }
+  } else if (isa<UnreachableInst>(BB->getTerminator())) {
+    // If there are any instructions immediately before the unreachable that can
+    // be removed, do so.
+    Instruction *Unreachable = BB->getTerminator();
+    while (Unreachable != BB->begin()) {
+      BasicBlock::iterator BBI = Unreachable;
+      --BBI;
+      if (isa<CallInst>(BBI)) break;
+      // Delete this instruction
+      BB->getInstList().erase(BBI);
+      Changed = true;
+    }
+
+    // If the unreachable instruction is the first in the block, take a gander
+    // at all of the predecessors of this instruction, and simplify them.
+    if (&BB->front() == Unreachable) {
+      std::vector<BasicBlock*> Preds(pred_begin(BB), pred_end(BB));
+      for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+        TerminatorInst *TI = Preds[i]->getTerminator();
+
+        if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+          if (BI->isUnconditional()) {
+            if (BI->getSuccessor(0) == BB) {
+              new UnreachableInst(TI);
+              TI->eraseFromParent();
+              Changed = true;
+            }
+          } else {
+            if (BI->getSuccessor(0) == BB) {
+              new BranchInst(BI->getSuccessor(1), BI);
+              BI->eraseFromParent();
+            } else if (BI->getSuccessor(1) == BB) {
+              new BranchInst(BI->getSuccessor(0), BI);
+              BI->eraseFromParent();
+              Changed = true;
+            }
+          }
+        } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+          for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+            if (SI->getSuccessor(i) == BB) {
+              BB->removePredecessor(SI->getParent());
+              SI->removeCase(i);
+              --i; --e;
+              Changed = true;
+            }
+          // If the default value is unreachable, figure out the most popular
+          // destination and make it the default.
+          if (SI->getSuccessor(0) == BB) {
+            std::map<BasicBlock*, unsigned> Popularity;
+            for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+              Popularity[SI->getSuccessor(i)]++;
+
+            // Find the most popular block.
+            unsigned MaxPop = 0;
+            BasicBlock *MaxBlock = 0;
+            for (std::map<BasicBlock*, unsigned>::iterator
+                   I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+              if (I->second > MaxPop) {
+                MaxPop = I->second;
+                MaxBlock = I->first;
+              }
+            }
+            if (MaxBlock) {
+              // Make this the new default, allowing us to delete any explicit
+              // edges to it.
+              SI->setSuccessor(0, MaxBlock);
+              Changed = true;
+
+              // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+              // it.
+              if (isa<PHINode>(MaxBlock->begin()))
+                for (unsigned i = 0; i != MaxPop-1; ++i)
+                  MaxBlock->removePredecessor(SI->getParent());
+
+              for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+                if (SI->getSuccessor(i) == MaxBlock) {
+                  SI->removeCase(i);
+                  --i; --e;
+                }
+            }
+          }
+        } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+          if (II->getUnwindDest() == BB) {
+            // Convert the invoke to a call instruction.  This would be a good
+            // place to note that the call does not throw though.
+            BranchInst *BI = new BranchInst(II->getNormalDest(), II);
+            II->removeFromParent();   // Take out of symbol table
+
+            // Insert the call now...
+            SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end());
+            CallInst *CI = new CallInst(II->getCalledValue(),
+                                        &Args[0], Args.size(),
+                                        II->getName(), BI);
+            CI->setCallingConv(II->getCallingConv());
+            // If the invoke produced a value, the Call does now instead.
+            II->replaceAllUsesWith(CI);
+            delete II;
+            Changed = true;
+          }
+        }
+      }
+
+      // If this block is now dead, remove it.
+      if (pred_begin(BB) == pred_end(BB)) {
+        // We know there are no successors, so just nuke the block.
+        M->getBasicBlockList().erase(BB);
+        return true;
+      }
+    }
+  }
+
+  // Merge basic blocks into their predecessor if there is only one distinct
+  // pred, and if there is only one distinct successor of the predecessor, and
+  // if there are no PHI nodes.
+  //
+  pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
+  BasicBlock *OnlyPred = *PI++;
+  for (; PI != PE; ++PI)  // Search all predecessors, see if they are all same
+    if (*PI != OnlyPred) {
+      OnlyPred = 0;       // There are multiple different predecessors...
+      break;
+    }
+
+  BasicBlock *OnlySucc = 0;
+  if (OnlyPred && OnlyPred != BB &&    // Don't break self loops
+      OnlyPred->getTerminator()->getOpcode() != Instruction::Invoke) {
+    // Check to see if there is only one distinct successor...
+    succ_iterator SI(succ_begin(OnlyPred)), SE(succ_end(OnlyPred));
+    OnlySucc = BB;
+    for (; SI != SE; ++SI)
+      if (*SI != OnlySucc) {
+        OnlySucc = 0;     // There are multiple distinct successors!
+        break;
+      }
+  }
+
+  if (OnlySucc) {
+    DOUT << "Merging: " << *BB << "into: " << *OnlyPred;
+
+    // Resolve any PHI nodes at the start of the block.  They are all
+    // guaranteed to have exactly one entry if they exist, unless there are
+    // multiple duplicate (but guaranteed to be equal) entries for the
+    // incoming edges.  This occurs when there are multiple edges from
+    // OnlyPred to OnlySucc.
+    //
+    while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      BB->getInstList().pop_front();  // Delete the phi node.
+    }
+
+    // Delete the unconditional branch from the predecessor.
+    OnlyPred->getInstList().pop_back();
+
+    // Move all definitions in the successor to the predecessor.
+    OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
+    // Make all PHI nodes that referred to BB now refer to Pred as their
+    // source.
+    BB->replaceAllUsesWith(OnlyPred);
+
+    // Inherit predecessors name if it exists.
+    if (!OnlyPred->hasName())
+      OnlyPred->takeName(BB);
+    
+    // Erase basic block from the function.
+    M->getBasicBlockList().erase(BB);
+
+    return true;
+  }
+
+  // Otherwise, if this block only has a single predecessor, and if that block
+  // is a conditional branch, see if we can hoist any code from this block up
+  // into our predecessor.
+  if (OnlyPred)
+    if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator()))
+      if (BI->isConditional()) {
+        // Get the other block.
+        BasicBlock *OtherBB = BI->getSuccessor(BI->getSuccessor(0) == BB);
+        PI = pred_begin(OtherBB);
+        ++PI;
+        if (PI == pred_end(OtherBB)) {
+          // We have a conditional branch to two blocks that are only reachable
+          // from the condbr.  We know that the condbr dominates the two blocks,
+          // so see if there is any identical code in the "then" and "else"
+          // blocks.  If so, we can hoist it up to the branching block.
+          Changed |= HoistThenElseCodeToIf(BI);
+        }
+      }
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    if (BranchInst *BI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+      // Change br (X == 0 | X == 1), T, F into a switch instruction.
+      if (BI->isConditional() && isa<Instruction>(BI->getCondition())) {
+        Instruction *Cond = cast<Instruction>(BI->getCondition());
+        // If this is a bunch of seteq's or'd together, or if it's a bunch of
+        // 'setne's and'ed together, collect them.
+        Value *CompVal = 0;
+        std::vector<ConstantInt*> Values;
+        bool TrueWhenEqual = GatherValueComparisons(Cond, CompVal, Values);
+        if (CompVal && CompVal->getType()->isInteger()) {
+          // There might be duplicate constants in the list, which the switch
+          // instruction can't handle, remove them now.
+          std::sort(Values.begin(), Values.end(), ConstantIntOrdering());
+          Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+
+          // Figure out which block is which destination.
+          BasicBlock *DefaultBB = BI->getSuccessor(1);
+          BasicBlock *EdgeBB    = BI->getSuccessor(0);
+          if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+
+          // Create the new switch instruction now.
+          SwitchInst *New = new SwitchInst(CompVal, DefaultBB,Values.size(),BI);
+
+          // Add all of the 'cases' to the switch instruction.
+          for (unsigned i = 0, e = Values.size(); i != e; ++i)
+            New->addCase(Values[i], EdgeBB);
+
+          // We added edges from PI to the EdgeBB.  As such, if there were any
+          // PHI nodes in EdgeBB, they need entries to be added corresponding to
+          // the number of edges added.
+          for (BasicBlock::iterator BBI = EdgeBB->begin();
+               isa<PHINode>(BBI); ++BBI) {
+            PHINode *PN = cast<PHINode>(BBI);
+            Value *InVal = PN->getIncomingValueForBlock(*PI);
+            for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+              PN->addIncoming(InVal, *PI);
+          }
+
+          // Erase the old branch instruction.
+          (*PI)->getInstList().erase(BI);
+
+          // Erase the potentially condition tree that was used to computed the
+          // branch condition.
+          ErasePossiblyDeadInstructionTree(Cond);
+          return true;
+        }
+      }
+
+  // If there is a trivial two-entry PHI node in this basic block, and we can
+  // eliminate it, do so now.
+  if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+    if (PN->getNumIncomingValues() == 2)
+      Changed |= FoldTwoEntryPHINode(PN); 
+
+  return Changed;
+}
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 0000000..b545ad3
--- /dev/null
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,138 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return
+// instruction in them.  Additionally, it keeps track of which node is the new
+// exit node of the CFG.  If there are no exit nodes in the CFG, the getExitNode
+// method will return a null pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodes::ID = 0;
+static RegisterPass<UnifyFunctionExitNodes>
+X("mergereturn", "Unify function exit nodes");
+
+int UnifyFunctionExitNodes::stub;
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+  return new UnifyFunctionExitNodes();
+}
+
+void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+  // We preserve the non-critical-edgeness property
+  AU.addPreservedID(BreakCriticalEdgesID);
+  // This is a cluster of orthogonal Transforms
+  AU.addPreservedID(PromoteMemoryToRegisterID);
+  AU.addPreservedID(LowerSelectID);
+  AU.addPreservedID(LowerSwitchID);
+}
+
+// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
+// BasicBlock, and converting all returns to unconditional branches to this
+// new basic block.  The singular exit node is returned.
+//
+// If there are no return stmts in the Function, a null pointer is returned.
+//
+bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
+  // Loop over all of the blocks in a function, tracking all of the blocks that
+  // return.
+  //
+  std::vector<BasicBlock*> ReturningBlocks;
+  std::vector<BasicBlock*> UnwindingBlocks;
+  std::vector<BasicBlock*> UnreachableBlocks;
+  for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    if (isa<ReturnInst>(I->getTerminator()))
+      ReturningBlocks.push_back(I);
+    else if (isa<UnwindInst>(I->getTerminator()))
+      UnwindingBlocks.push_back(I);
+    else if (isa<UnreachableInst>(I->getTerminator()))
+      UnreachableBlocks.push_back(I);
+
+  // Handle unwinding blocks first.
+  if (UnwindingBlocks.empty()) {
+    UnwindBlock = 0;
+  } else if (UnwindingBlocks.size() == 1) {
+    UnwindBlock = UnwindingBlocks.front();
+  } else {
+    UnwindBlock = new BasicBlock("UnifiedUnwindBlock", &F);
+    new UnwindInst(UnwindBlock);
+
+    for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
+           E = UnwindingBlocks.end(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      BB->getInstList().pop_back();  // Remove the unwind insn
+      new BranchInst(UnwindBlock, BB);
+    }
+  }
+
+  // Then unreachable blocks.
+  if (UnreachableBlocks.empty()) {
+    UnreachableBlock = 0;
+  } else if (UnreachableBlocks.size() == 1) {
+    UnreachableBlock = UnreachableBlocks.front();
+  } else {
+    UnreachableBlock = new BasicBlock("UnifiedUnreachableBlock", &F);
+    new UnreachableInst(UnreachableBlock);
+
+    for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
+           E = UnreachableBlocks.end(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      BB->getInstList().pop_back();  // Remove the unreachable inst.
+      new BranchInst(UnreachableBlock, BB);
+    }
+  }
+
+  // Now handle return blocks.
+  if (ReturningBlocks.empty()) {
+    ReturnBlock = 0;
+    return false;                          // No blocks return
+  } else if (ReturningBlocks.size() == 1) {
+    ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+    return false;
+  }
+
+  // Otherwise, we need to insert a new basic block into the function, add a PHI
+  // node (if the function returns a value), and convert all of the return
+  // instructions into unconditional branches.
+  //
+  BasicBlock *NewRetBlock = new BasicBlock("UnifiedReturnBlock", &F);
+
+  PHINode *PN = 0;
+  if (F.getReturnType() != Type::VoidTy) {
+    // If the function doesn't return void... add a PHI node to the block...
+    PN = new PHINode(F.getReturnType(), "UnifiedRetVal");
+    NewRetBlock->getInstList().push_back(PN);
+  }
+  new ReturnInst(PN, NewRetBlock);
+
+  // Loop over all of the blocks, replacing the return instruction with an
+  // unconditional branch.
+  //
+  for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
+         E = ReturningBlocks.end(); I != E; ++I) {
+    BasicBlock *BB = *I;
+
+    // Add an incoming element to the PHI node for every return instruction that
+    // is merging into this new block...
+    if (PN) PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+    BB->getInstList().pop_back();  // Remove the return insn
+    new BranchInst(NewRetBlock, BB);
+  }
+  ReturnBlock = NewRetBlock;
+  return true;
+}
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 0000000..0b8c5c2
--- /dev/null
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,118 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueMapper.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+using namespace llvm;
+
+Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {
+  Value *&VMSlot = VM[V];
+  if (VMSlot) return VMSlot;      // Does it exist in the map yet?
+  
+  // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
+  // DenseMap.  This includes any recursive calls to MapValue.
+
+  // Global values do not need to be seeded into the ValueMap if they are using
+  // the identity mapping.
+  if (isa<GlobalValue>(V) || isa<InlineAsm>(V))
+    return VMSlot = const_cast<Value*>(V);
+
+  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
+    if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
+        isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
+        isa<UndefValue>(C))
+      return VMSlot = C;           // Primitive constants map directly
+    else if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+      for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+        Value *MV = MapValue(CA->getOperand(i), VM);
+        if (MV != CA->getOperand(i)) {
+          // This array must contain a reference to a global, make a new array
+          // and return it.
+          //
+          std::vector<Constant*> Values;
+          Values.reserve(CA->getNumOperands());
+          for (unsigned j = 0; j != i; ++j)
+            Values.push_back(CA->getOperand(j));
+          Values.push_back(cast<Constant>(MV));
+          for (++i; i != e; ++i)
+            Values.push_back(cast<Constant>(MapValue(CA->getOperand(i), VM)));
+          return VM[V] = ConstantArray::get(CA->getType(), Values);
+        }
+      }
+      return VM[V] = C;
+
+    } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+      for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
+        Value *MV = MapValue(CS->getOperand(i), VM);
+        if (MV != CS->getOperand(i)) {
+          // This struct must contain a reference to a global, make a new struct
+          // and return it.
+          //
+          std::vector<Constant*> Values;
+          Values.reserve(CS->getNumOperands());
+          for (unsigned j = 0; j != i; ++j)
+            Values.push_back(CS->getOperand(j));
+          Values.push_back(cast<Constant>(MV));
+          for (++i; i != e; ++i)
+            Values.push_back(cast<Constant>(MapValue(CS->getOperand(i), VM)));
+          return VM[V] = ConstantStruct::get(CS->getType(), Values);
+        }
+      }
+      return VM[V] = C;
+
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      std::vector<Constant*> Ops;
+      for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+        Ops.push_back(cast<Constant>(MapValue(CE->getOperand(i), VM)));
+      return VM[V] = CE->getWithOperands(Ops);
+    } else if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+      for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) {
+        Value *MV = MapValue(CP->getOperand(i), VM);
+        if (MV != CP->getOperand(i)) {
+          // This vector value must contain a reference to a global, make a new
+          // vector constant and return it.
+          //
+          std::vector<Constant*> Values;
+          Values.reserve(CP->getNumOperands());
+          for (unsigned j = 0; j != i; ++j)
+            Values.push_back(CP->getOperand(j));
+          Values.push_back(cast<Constant>(MV));
+          for (++i; i != e; ++i)
+            Values.push_back(cast<Constant>(MapValue(CP->getOperand(i), VM)));
+          return VM[V] = ConstantVector::get(Values);
+        }
+      }
+      return VM[V] = C;
+      
+    } else {
+      assert(0 && "Unknown type of constant!");
+    }
+  }
+
+  return 0;
+}
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by ValueMap.
+///
+void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) {
+  for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+    const Value *Op = I->getOperand(op);
+    Value *V = MapValue(Op, ValueMap);
+    assert(V && "Referenced value not in value map!");
+    I->setOperand(op, V);
+  }
+}
diff --git a/lib/Transforms/Utils/ValueMapper.h b/lib/Transforms/Utils/ValueMapper.h
new file mode 100644
index 0000000..51319db
--- /dev/null
+++ b/lib/Transforms/Utils/ValueMapper.h
@@ -0,0 +1,29 @@
+//===- ValueMapper.h - Interface shared by lib/Transforms/Utils -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue interface which is used by various parts of
+// the Transforms/Utils library to implement cloning and linking facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef VALUEMAPPER_H
+#define VALUEMAPPER_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+  class Value;
+  class Instruction;
+  typedef DenseMap<const Value *, Value *> ValueMapTy;
+
+  Value *MapValue(const Value *V, ValueMapTy &VM);
+  void RemapInstruction(Instruction *I, ValueMapTy &VM);
+} // End llvm namespace
+
+#endif