It's not necessary to do rounding for alloca operations when the requested
alignment is equal to the stack alignment.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40004 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
new file mode 100644
index 0000000..3c7efb1
--- /dev/null
+++ b/lib/Transforms/Instrumentation/RSProfiling.cpp
@@ -0,0 +1,650 @@
+//===- RSProfiling.cpp - Various profiling using random sampling ----------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file was developed by the LLVM research group and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These passes implement a random sampling based profiling.  Different methods
+// of choosing when to sample are supported, as well as different types of
+// profiling.  This is done as two passes.  The first is a sequence of profiling
+// passes which insert profiling into the program, and remember what they 
+// inserted.
+//
+// The second stage duplicates all instructions in a function, ignoring the 
+// profiling code, then connects the two versions togeather at the entry and at
+// backedges.  At each connection point a choice is made as to whether to jump
+// to the profiled code (take a sample) or execute the unprofiled code.
+//
+// It is highly recommeneded that after this pass one runs mem2reg and adce
+// (instcombine load-vn gdce dse also are good to run afterwards)
+//
+// This design is intended to make the profiling passes independent of the RS
+// framework, but any profiling pass that implements the RSProfiling interface
+// is compatible with the rs framework (and thus can be sampled)
+//
+// TODO: obviously the block and function profiling are almost identical to the
+// existing ones, so they can be unified (esp since these passes are valid
+// without the rs framework).
+// TODO: Fix choice code so that frequency is not hard coded
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "RSProfiling.h"
+#include <set>
+#include <map>
+#include <queue>
+#include <list>
+using namespace llvm;
+
+namespace {
+  enum RandomMeth {
+    GBV, GBVO, HOSTCC
+  };
+
+  cl::opt<RandomMeth> RandomMethod("profile-randomness",
+      cl::desc("How to randomly choose to profile:"),
+      cl::values(
+                 clEnumValN(GBV, "global", "global counter"),
+                 clEnumValN(GBVO, "ra_global", 
+                            "register allocated global counter"),
+                 clEnumValN(HOSTCC, "rdcc", "cycle counter"),
+                 clEnumValEnd));
+  
+  /// NullProfilerRS - The basic profiler that does nothing.  It is the default
+  /// profiler and thus terminates RSProfiler chains.  It is useful for 
+  /// measuring framework overhead
+  class VISIBILITY_HIDDEN NullProfilerRS : public RSProfilers {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    bool isProfiling(Value* v) {
+      return false;
+    }
+    bool runOnModule(Module &M) {
+      return false;
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+
+  static RegisterAnalysisGroup<RSProfilers> A("Profiling passes");
+  static RegisterPass<NullProfilerRS> NP("insert-null-profiling-rs",
+                                         "Measure profiling framework overhead");
+  static RegisterAnalysisGroup<RSProfilers, true> NPT(NP);
+
+  /// Chooser - Something that chooses when to make a sample of the profiled code
+  class VISIBILITY_HIDDEN Chooser {
+  public:
+    /// ProcessChoicePoint - is called for each basic block inserted to choose 
+    /// between normal and sample code
+    virtual void ProcessChoicePoint(BasicBlock*) = 0;
+    /// PrepFunction - is called once per function before other work is done.
+    /// This gives the opertunity to insert new allocas and such.
+    virtual void PrepFunction(Function*) = 0;
+    virtual ~Chooser() {}
+  };
+
+  //Things that implement sampling policies
+  //A global value that is read-mod-stored to choose when to sample.
+  //A sample is taken when the global counter hits 0
+  class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser {
+    GlobalVariable* Counter;
+    Value* ResetValue;
+    const Type* T;
+  public:
+    GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval);
+    virtual ~GlobalRandomCounter();
+    virtual void PrepFunction(Function* F);
+    virtual void ProcessChoicePoint(BasicBlock* bb);
+  };
+
+  //Same is GRC, but allow register allocation of the global counter
+  class VISIBILITY_HIDDEN GlobalRandomCounterOpt : public Chooser {
+    GlobalVariable* Counter;
+    Value* ResetValue;
+    AllocaInst* AI;
+    const Type* T;
+  public:
+    GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval);
+    virtual ~GlobalRandomCounterOpt();
+    virtual void PrepFunction(Function* F);
+    virtual void ProcessChoicePoint(BasicBlock* bb);
+  };
+
+  //Use the cycle counter intrinsic as a source of pseudo randomness when
+  //deciding when to sample.
+  class VISIBILITY_HIDDEN CycleCounter : public Chooser {
+    uint64_t rm;
+    Constant *F;
+  public:
+    CycleCounter(Module& m, uint64_t resetmask);
+    virtual ~CycleCounter();
+    virtual void PrepFunction(Function* F);
+    virtual void ProcessChoicePoint(BasicBlock* bb);
+  };
+
+  /// ProfilerRS - Insert the random sampling framework
+  struct VISIBILITY_HIDDEN ProfilerRS : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    ProfilerRS() : FunctionPass((intptr_t)&ID) {}
+
+    std::map<Value*, Value*> TransCache;
+    std::set<BasicBlock*> ChoicePoints;
+    Chooser* c;
+
+    //Translate and duplicate values for the new profile free version of stuff
+    Value* Translate(Value* v);
+    //Duplicate an entire function (with out profiling)
+    void Duplicate(Function& F, RSProfilers& LI);
+    //Called once for each backedge, handle the insertion of choice points and
+    //the interconection of the two versions of the code
+    void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F);
+    bool runOnFunction(Function& F);
+    bool doInitialization(Module &M);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  };
+
+  RegisterPass<ProfilerRS> X("insert-rs-profiling-framework",
+                             "Insert random sampling instrumentation framework");
+}
+
+char RSProfilers::ID = 0;
+char NullProfilerRS::ID = 0;
+char ProfilerRS::ID = 0;
+
+//Local utilities
+static void ReplacePhiPred(BasicBlock* btarget, 
+                           BasicBlock* bold, BasicBlock* bnew);
+
+static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc);
+
+template<class T>
+static void recBackEdge(BasicBlock* bb, T& BackEdges, 
+                        std::map<BasicBlock*, int>& color,
+                        std::map<BasicBlock*, int>& depth,
+                        std::map<BasicBlock*, int>& finish,
+                        int& time);
+
+//find the back edges and where they go to
+template<class T>
+static void getBackEdges(Function& F, T& BackEdges);
+
+
+///////////////////////////////////////
+// Methods of choosing when to profile
+///////////////////////////////////////
+  
+GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t, 
+                                         uint64_t resetval) : T(t) {
+  ConstantInt* Init = ConstantInt::get(T, resetval); 
+  ResetValue = Init;
+  Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
+                               Init, "RandomSteeringCounter", &M);
+}
+
+GlobalRandomCounter::~GlobalRandomCounter() {}
+
+void GlobalRandomCounter::PrepFunction(Function* F) {}
+
+void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
+  BranchInst* t = cast<BranchInst>(bb->getTerminator());
+  
+  //decrement counter
+  LoadInst* l = new LoadInst(Counter, "counter", t);
+  
+  ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), 
+                             "countercc", t);
+
+  Value* nv = BinaryOperator::createSub(l, ConstantInt::get(T, 1),
+                                        "counternew", t);
+  new StoreInst(nv, Counter, t);
+  t->setCondition(s);
+  
+  //reset counter
+  BasicBlock* oldnext = t->getSuccessor(0);
+  BasicBlock* resetblock = new BasicBlock("reset", oldnext->getParent(), 
+                                          oldnext);
+  TerminatorInst* t2 = new BranchInst(oldnext, resetblock);
+  t->setSuccessor(0, resetblock);
+  new StoreInst(ResetValue, Counter, t2);
+  ReplacePhiPred(oldnext, bb, resetblock);
+}
+
+GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t, 
+                                               uint64_t resetval) 
+  : AI(0), T(t) {
+  ConstantInt* Init = ConstantInt::get(T, resetval);
+  ResetValue  = Init;
+  Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
+                               Init, "RandomSteeringCounter", &M);
+}
+
+GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {}
+
+void GlobalRandomCounterOpt::PrepFunction(Function* F) {
+  //make a local temporary to cache the global
+  BasicBlock& bb = F->getEntryBlock();
+  BasicBlock::iterator InsertPt = bb.begin();
+  AI = new AllocaInst(T, 0, "localcounter", InsertPt);
+  LoadInst* l = new LoadInst(Counter, "counterload", InsertPt);
+  new StoreInst(l, AI, InsertPt);
+  
+  //modify all functions and return values to restore the local variable to/from
+  //the global variable
+  for(Function::iterator fib = F->begin(), fie = F->end();
+      fib != fie; ++fib)
+    for(BasicBlock::iterator bib = fib->begin(), bie = fib->end();
+        bib != bie; ++bib)
+      if (isa<CallInst>(bib)) {
+        LoadInst* l = new LoadInst(AI, "counter", bib);
+        new StoreInst(l, Counter, bib);
+        l = new LoadInst(Counter, "counter", ++bib);
+        new StoreInst(l, AI, bib--);
+      } else if (isa<InvokeInst>(bib)) {
+        LoadInst* l = new LoadInst(AI, "counter", bib);
+        new StoreInst(l, Counter, bib);
+        
+        BasicBlock* bb = cast<InvokeInst>(bib)->getNormalDest();
+        BasicBlock::iterator i = bb->begin();
+        while (isa<PHINode>(i))
+          ++i;
+        l = new LoadInst(Counter, "counter", i);
+        
+        bb = cast<InvokeInst>(bib)->getUnwindDest();
+        i = bb->begin();
+        while (isa<PHINode>(i)) ++i;
+        l = new LoadInst(Counter, "counter", i);
+        new StoreInst(l, AI, i);
+      } else if (isa<UnwindInst>(&*bib) || isa<ReturnInst>(&*bib)) {
+        LoadInst* l = new LoadInst(AI, "counter", bib);
+        new StoreInst(l, Counter, bib);
+      }
+}
+
+void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
+  BranchInst* t = cast<BranchInst>(bb->getTerminator());
+  
+  //decrement counter
+  LoadInst* l = new LoadInst(AI, "counter", t);
+  
+  ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), 
+                             "countercc", t);
+
+  Value* nv = BinaryOperator::createSub(l, ConstantInt::get(T, 1),
+                                        "counternew", t);
+  new StoreInst(nv, AI, t);
+  t->setCondition(s);
+  
+  //reset counter
+  BasicBlock* oldnext = t->getSuccessor(0);
+  BasicBlock* resetblock = new BasicBlock("reset", oldnext->getParent(), 
+                                          oldnext);
+  TerminatorInst* t2 = new BranchInst(oldnext, resetblock);
+  t->setSuccessor(0, resetblock);
+  new StoreInst(ResetValue, AI, t2);
+  ReplacePhiPred(oldnext, bb, resetblock);
+}
+
+
+CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) {
+  F = m.getOrInsertFunction("llvm.readcyclecounter", Type::Int64Ty, NULL);
+}
+
+CycleCounter::~CycleCounter() {}
+
+void CycleCounter::PrepFunction(Function* F) {}
+
+void CycleCounter::ProcessChoicePoint(BasicBlock* bb) {
+  BranchInst* t = cast<BranchInst>(bb->getTerminator());
+  
+  CallInst* c = new CallInst(F, "rdcc", t);
+  BinaryOperator* b = 
+    BinaryOperator::createAnd(c, ConstantInt::get(Type::Int64Ty, rm),
+                              "mrdcc", t);
+  
+  ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b,
+                             ConstantInt::get(Type::Int64Ty, 0), 
+                             "mrdccc", t);
+
+  t->setCondition(s);
+}
+
+///////////////////////////////////////
+// Profiling:
+///////////////////////////////////////
+bool RSProfilers_std::isProfiling(Value* v) {
+  if (profcode.find(v) != profcode.end())
+    return true;
+  //else
+  RSProfilers& LI = getAnalysis<RSProfilers>();
+  return LI.isProfiling(v);
+}
+
+void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+                                          GlobalValue *CounterArray) {
+  // Insert the increment after any alloca or PHI instructions...
+  BasicBlock::iterator InsertPos = BB->begin();
+  while (isa<AllocaInst>(InsertPos) || isa<PHINode>(InsertPos))
+    ++InsertPos;
+  
+  // Create the getelementptr constant expression
+  std::vector<Constant*> Indices(2);
+  Indices[0] = Constant::getNullValue(Type::Int32Ty);
+  Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum);
+  Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray,
+                                                        &Indices[0], 2);
+  
+  // Load, increment and store the value back.
+  Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);
+  profcode.insert(OldVal);
+  Value *NewVal = BinaryOperator::createAdd(OldVal,
+                                            ConstantInt::get(Type::Int32Ty, 1),
+                                            "NewCounter", InsertPos);
+  profcode.insert(NewVal);
+  profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos));
+}
+
+void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const {
+  //grab any outstanding profiler, or get the null one
+  AU.addRequired<RSProfilers>();
+}
+
+///////////////////////////////////////
+// RS Framework
+///////////////////////////////////////
+
+Value* ProfilerRS::Translate(Value* v) {
+  if(TransCache[v])
+    return TransCache[v];
+  
+  if (BasicBlock* bb = dyn_cast<BasicBlock>(v)) {
+    if (bb == &bb->getParent()->getEntryBlock())
+      TransCache[bb] = bb; //don't translate entry block
+    else
+      TransCache[bb] = new BasicBlock("dup_" + bb->getName(), bb->getParent(), 
+                                      NULL);
+    return TransCache[bb];
+  } else if (Instruction* i = dyn_cast<Instruction>(v)) {
+    //we have already translated this
+    //do not translate entry block allocas
+    if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) {
+      TransCache[i] = i;
+      return i;
+    } else {
+      //translate this
+      Instruction* i2 = i->clone();
+      if (i->hasName())
+        i2->setName("dup_" + i->getName());
+      TransCache[i] = i2;
+      //NumNewInst++;
+      for (unsigned x = 0; x < i2->getNumOperands(); ++x)
+        i2->setOperand(x, Translate(i2->getOperand(x)));
+      return i2;
+    }
+  } else if (isa<Function>(v) || isa<Constant>(v) || isa<Argument>(v)) {
+    TransCache[v] = v;
+    return v;
+  }
+  assert(0 && "Value not handled");
+  return 0;
+}
+
+void ProfilerRS::Duplicate(Function& F, RSProfilers& LI)
+{
+  //perform a breadth first search, building up a duplicate of the code
+  std::queue<BasicBlock*> worklist;
+  std::set<BasicBlock*> seen;
+  
+  //This loop ensures proper BB order, to help performance
+  for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib)
+    worklist.push(fib);
+  while (!worklist.empty()) {
+    Translate(worklist.front());
+    worklist.pop();
+  }
+  
+  //remember than reg2mem created a new entry block we don't want to duplicate
+  worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0));
+  seen.insert(&F.getEntryBlock());
+  
+  while (!worklist.empty()) {
+    BasicBlock* bb = worklist.front();
+    worklist.pop();
+    if(seen.find(bb) == seen.end()) {
+      BasicBlock* bbtarget = cast<BasicBlock>(Translate(bb));
+      BasicBlock::InstListType& instlist = bbtarget->getInstList();
+      for (BasicBlock::iterator iib = bb->begin(), iie = bb->end(); 
+           iib != iie; ++iib) {
+        //NumOldInst++;
+        if (!LI.isProfiling(&*iib)) {
+          Instruction* i = cast<Instruction>(Translate(iib));
+          instlist.insert(bbtarget->end(), i);
+        }
+      }
+      //updated search state;
+      seen.insert(bb);
+      TerminatorInst* ti = bb->getTerminator();
+      for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) {
+        BasicBlock* bbs = ti->getSuccessor(x);
+        if (seen.find(bbs) == seen.end()) {
+          worklist.push(bbs);
+        }
+      }
+    }
+  }
+}
+
+void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) {
+  //given a backedge from B -> A, and translations A' and B',
+  //a: insert C and C'
+  //b: add branches in C to A and A' and in C' to A and A'
+  //c: mod terminators@B, replace A with C
+  //d: mod terminators@B', replace A' with C'
+  //e: mod phis@A for pred B to be pred C
+  //       if multiple entries, simplify to one
+  //f: mod phis@A' for pred B' to be pred C'
+  //       if multiple entries, simplify to one
+  //g: for all phis@A with pred C using x
+  //       add in edge from C' using x'
+  //       add in edge from C using x in A'
+  
+  //a:
+  Function::iterator BBN = src; ++BBN;
+  BasicBlock* bbC = new BasicBlock("choice", &F, BBN);
+  //ChoicePoints.insert(bbC);
+  BBN = cast<BasicBlock>(Translate(src));
+  BasicBlock* bbCp = new BasicBlock("choice", &F, ++BBN);
+  ChoicePoints.insert(bbCp);
+  
+  //b:
+  new BranchInst(cast<BasicBlock>(Translate(dst)), bbC);
+  new BranchInst(dst, cast<BasicBlock>(Translate(dst)), 
+                 ConstantInt::get(Type::Int1Ty, true), bbCp);
+  //c:
+  {
+    TerminatorInst* iB = src->getTerminator();
+    for (unsigned x = 0; x < iB->getNumSuccessors(); ++x)
+      if (iB->getSuccessor(x) == dst)
+        iB->setSuccessor(x, bbC);
+  }
+  //d:
+  {
+    TerminatorInst* iBp = cast<TerminatorInst>(Translate(src->getTerminator()));
+    for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x)
+      if (iBp->getSuccessor(x) == cast<BasicBlock>(Translate(dst)))
+        iBp->setSuccessor(x, bbCp);
+  }
+  //e:
+  ReplacePhiPred(dst, src, bbC);
+  //src could be a switch, in which case we are replacing several edges with one
+  //thus collapse those edges int the Phi
+  CollapsePhi(dst, bbC);
+  //f:
+  ReplacePhiPred(cast<BasicBlock>(Translate(dst)),
+                 cast<BasicBlock>(Translate(src)),bbCp);
+  CollapsePhi(cast<BasicBlock>(Translate(dst)), bbCp);
+  //g:
+  for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie;
+      ++ib)
+    if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
+      for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
+        if(bbC == phi->getIncomingBlock(x)) {
+          phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp);
+          cast<PHINode>(Translate(phi))->addIncoming(phi->getIncomingValue(x), 
+                                                     bbC);
+        }
+      phi->removeIncomingValue(bbC);
+    }
+}
+
+bool ProfilerRS::runOnFunction(Function& F) {
+  if (!F.isDeclaration()) {
+    std::set<std::pair<BasicBlock*, BasicBlock*> > BackEdges;
+    RSProfilers& LI = getAnalysis<RSProfilers>();
+    
+    getBackEdges(F, BackEdges);
+    Duplicate(F, LI);
+    //assume that stuff worked.  now connect the duplicated basic blocks 
+    //with the originals in such a way as to preserve ssa.  yuk!
+    for (std::set<std::pair<BasicBlock*, BasicBlock*> >::iterator 
+           ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib)
+      ProcessBackEdge(ib->first, ib->second, F);
+    
+    //oh, and add the edge from the reg2mem created entry node to the 
+    //duplicated second node
+    TerminatorInst* T = F.getEntryBlock().getTerminator();
+    ReplaceInstWithInst(T, new BranchInst(T->getSuccessor(0),
+                                          cast<BasicBlock>(
+                                            Translate(T->getSuccessor(0))),
+                                          ConstantInt::get(Type::Int1Ty, true)));
+    
+    //do whatever is needed now that the function is duplicated
+    c->PrepFunction(&F);
+    
+    //add entry node to choice points
+    ChoicePoints.insert(&F.getEntryBlock());
+    
+    for (std::set<BasicBlock*>::iterator 
+           ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii)
+      c->ProcessChoicePoint(*ii);
+    
+    ChoicePoints.clear();
+    TransCache.clear();
+    
+    return true;
+  }
+  return false;
+}
+
+bool ProfilerRS::doInitialization(Module &M) {
+  switch (RandomMethod) {
+  case GBV:
+    c = new GlobalRandomCounter(M, Type::Int32Ty, (1 << 14) - 1);
+    break;
+  case GBVO:
+    c = new GlobalRandomCounterOpt(M, Type::Int32Ty, (1 << 14) - 1);
+    break;
+  case HOSTCC:
+    c = new CycleCounter(M, (1 << 14) - 1);
+    break;
+  };
+  return true;
+}
+
+void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<RSProfilers>();
+  AU.addRequiredID(DemoteRegisterToMemoryID);
+}
+
+///////////////////////////////////////
+// Utilities:
+///////////////////////////////////////
+static void ReplacePhiPred(BasicBlock* btarget, 
+                           BasicBlock* bold, BasicBlock* bnew) {
+  for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
+      ib != ie; ++ib)
+    if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
+      for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
+        if(bold == phi->getIncomingBlock(x))
+          phi->setIncomingBlock(x, bnew);
+    }
+}
+
+static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) {
+  for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
+      ib != ie; ++ib)
+    if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
+      std::map<BasicBlock*, Value*> counter;
+      for(unsigned i = 0; i < phi->getNumIncomingValues(); ) {
+        if (counter[phi->getIncomingBlock(i)]) {
+          assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]);
+          phi->removeIncomingValue(i, false);
+        } else {
+          counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i);
+          ++i;
+        }
+      }
+    } 
+}
+
+template<class T>
+static void recBackEdge(BasicBlock* bb, T& BackEdges, 
+                        std::map<BasicBlock*, int>& color,
+                        std::map<BasicBlock*, int>& depth,
+                        std::map<BasicBlock*, int>& finish,
+                        int& time)
+{
+  color[bb] = 1;
+  ++time;
+  depth[bb] = time;
+  TerminatorInst* t= bb->getTerminator();
+  for(unsigned i = 0; i < t->getNumSuccessors(); ++i) {
+    BasicBlock* bbnew = t->getSuccessor(i);
+    if (color[bbnew] == 0)
+      recBackEdge(bbnew, BackEdges, color, depth, finish, time);
+    else if (color[bbnew] == 1) {
+      BackEdges.insert(std::make_pair(bb, bbnew));
+      //NumBackEdges++;
+    }
+  }
+  color[bb] = 2;
+  ++time;
+  finish[bb] = time;
+}
+
+
+
+//find the back edges and where they go to
+template<class T>
+static void getBackEdges(Function& F, T& BackEdges) {
+  std::map<BasicBlock*, int> color;
+  std::map<BasicBlock*, int> depth;
+  std::map<BasicBlock*, int> finish;
+  int time = 0;
+  recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time);
+  DOUT << F.getName() << " " << BackEdges.size() << "\n";
+}
+
+
+//Creation functions
+ModulePass* llvm::createNullProfilerRSPass() {
+  return new NullProfilerRS();
+}
+
+FunctionPass* llvm::createRSProfilingPass() {
+  return new ProfilerRS();
+}