Added new circuit finding alogrithm.
Fixed bug in graph so that phi ite diff edges are added.
llvm-svn: 20108
diff --git a/llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp b/llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp
index 5a415e7..376cbfd 100644
--- a/llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp
+++ b/llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp
@@ -14,6 +14,9 @@
 
 #include "MSchedGraph.h"
 #include "../SparcV9RegisterInfo.h"
+#include "../MachineCodeForInstruction.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
@@ -30,6 +33,17 @@
   graph->addNode(inst, this);
 }
 
+MSchedGraphNode::MSchedGraphNode(const MSchedGraphNode &N) 
+  : Predecessors(N.Predecessors), Successors(N.Successors) {
+
+  Inst = N.Inst;
+  Parent = N.Parent;
+  index = N.index;
+  latency = N.latency;
+  isBranchInstr = N.isBranchInstr;
+
+}
+
 void MSchedGraphNode::print(std::ostream &os) const {
   os << "MSchedGraphNode: Inst=" << *Inst << ", latency= " << latency << "\n"; 
 }
@@ -46,6 +60,16 @@
   abort();
 }
 
+unsigned MSchedGraphNode::getIteDiff(MSchedGraphNode *succ) {
+  for(std::vector<MSchedGraphEdge>::iterator I = Successors.begin(), E = Successors.end();
+      I != E; ++I) {
+    if(I->getDest() == succ)
+      return I->getIteDiff();
+  }
+  return 0;
+}
+
+
 unsigned MSchedGraphNode::getInEdgeNum(MSchedGraphNode *pred) {
   //Loop over all the successors of our predecessor
   //return the edge the corresponds to this in edge
@@ -85,6 +109,19 @@
   GraphMap[MI] = node;
 }
 
+void MSchedGraph::deleteNode(MSchedGraphNode *node) {
+  
+  //Delete the edge to this node from all predecessors
+  for(MSchedGraphNode::pred_iterator P = node->pred_begin(), PE = node->pred_end();
+      P != PE; ++P) {
+    (*P)->deleteSuccessor(node);
+  }
+
+  //Remove this node from the graph
+  GraphMap.erase(node->getInst());
+
+}
+
 MSchedGraph::MSchedGraph(const MachineBasicBlock *bb, const TargetMachine &targ)
   : BB(bb), Target(targ) {
   
@@ -97,6 +134,41 @@
   buildNodesAndEdges();
 }
 
+MSchedGraph::MSchedGraph(const MSchedGraph &G, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) 
+  : BB(G.BB), Target(G.Target) {
+  
+  std::map<MSchedGraphNode*, MSchedGraphNode*> oldToNew;
+  //Copy all nodes
+  for(MSchedGraph::const_iterator N = G.GraphMap.begin(), NE = G.GraphMap.end(); 
+      N != NE; ++N) {
+    MSchedGraphNode *newNode = new MSchedGraphNode(*(N->second));
+    oldToNew[&*(N->second)] = newNode;
+    newNodes[newNode] = &*(N->second);
+    GraphMap[&*(N->first)] = newNode;
+  }
+  
+  //Loop over nodes and update edges to point to new nodes
+  for(MSchedGraph::iterator N = GraphMap.begin(), NE = GraphMap.end(); N != NE; ++N) {
+    
+    //Get the node we are dealing with
+    MSchedGraphNode *node = &*(N->second);
+
+    node->setParent(this);
+
+    //Loop over nodes successors and predecessors and update to the new nodes
+    for(unsigned i = 0; i < node->pred_size(); ++i) {
+      node->setPredecessor(i, oldToNew[node->getPredecessor(i)]);
+    }
+    
+    for(unsigned i = 0; i < node->succ_size(); ++i) {
+      MSchedGraphEdge *edge = node->getSuccessor(i);
+      MSchedGraphNode *oldDest = edge->getDest();
+      edge->setDest(oldToNew[oldDest]);
+    }
+  }  
+}
+
+
 MSchedGraph::~MSchedGraph () {
   for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end(); I != E; ++I)
     delete I->second;
@@ -114,12 +186,13 @@
   //Save PHI instructions to deal with later
   std::vector<const MachineInstr*> phiInstrs;
   unsigned index = 0;
+
   //Loop over instructions in MBB and add nodes and edges
   for (MachineBasicBlock::const_iterator MI = BB->begin(), e = BB->end(); MI != e; ++MI) {
     //Get each instruction of machine basic block, get the delay
     //using the op code, create a new node for it, and add to the
     //graph.
-    
+   
     MachineOpCode opCode = MI->getOpcode();
     int delay;
 
@@ -138,9 +211,8 @@
     if(MTI->isNop(opCode))
       continue;
     
-    //Add PHI to phi instruction list to be processed later
-    if (opCode == TargetInstrInfo::PHI)
-      phiInstrs.push_back(MI);
+    //Sparc BE does not use PHI opcode, so assert on this case
+    assert(opCode != TargetInstrInfo::PHI && "Did not expect PHI opcode");
 
     bool isBranch = false;
 
@@ -185,8 +257,10 @@
 	assert((mOp.getVRegValue() != NULL) && "Null value is defined");
 
 	//Check if this is a read operation in a phi node, if so DO NOT PROCESS
-	if(mOp.isUse() && (opCode == TargetInstrInfo::PHI))
+	if(mOp.isUse() && (opCode == TargetInstrInfo::PHI)) {
+	  DEBUG(std::cerr << "Read Operation in a PHI node\n");
 	  continue;
+	}
 
       
 	if (const Value* srcI = mOp.getVRegValue()) {
@@ -213,14 +287,37 @@
     }
     ++index;
   }
+
+  //Loop over LLVM BB, examine phi instructions, and add them to our phiInstr list to process
+  const BasicBlock *llvm_bb = BB->getBasicBlock();
+  for(BasicBlock::const_iterator I = llvm_bb->begin(), E = llvm_bb->end(); I != E; ++I) {
+    if(const PHINode *PN = dyn_cast<PHINode>(I)) {
+      MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(PN);
+       for (unsigned j = 0; j < tempMvec.size(); j++) {
+	 DEBUG(std::cerr << "Inserting phi instr into map: " << *tempMvec[j] << "\n");
+	 phiInstrs.push_back((MachineInstr*) tempMvec[j]);
+       }
+    }
+
+  }
+
   addMemEdges(memInstructions);
   addMachRegEdges(regNumtoNodeMap);
 
   //Finally deal with PHI Nodes and Value*
   for(std::vector<const MachineInstr*>::iterator I = phiInstrs.begin(), E = phiInstrs.end(); I != E;  ++I) {
+
     //Get Node for this instruction
-    MSchedGraphNode *node = find(*I)->second;
-  
+    std::map<const MachineInstr*, MSchedGraphNode*>::iterator X;
+    X = find(*I);
+
+    if(X == GraphMap.end())
+      continue;
+
+    MSchedGraphNode *node = X->second;
+
+    DEBUG(std::cerr << "Adding ite diff edges for node: " << *node << "\n");
+
     //Loop over operands for this instruction and add value edges
     for(unsigned i=0; i < (*I)->getNumOperands(); ++i) {
       //Get Operand
@@ -258,13 +355,14 @@
 
     //Node is a Def, so add output dep.
     if(nodeIsDef) {
-      if(mOp.isUse())
+      if(mOp.isUse()) {
 	srcNode->addOutEdge(destNode, MSchedGraphEdge::ValueDep, 
 			    MSchedGraphEdge::AntiDep, diff);
-      if(mOp.isDef())
+      }
+      if(mOp.isDef()) {
 	srcNode->addOutEdge(destNode, MSchedGraphEdge::ValueDep, 
 			    MSchedGraphEdge::OutputDep, diff);
-      
+      }
     }
     if(nodeIsUse) {
       if(mOp.isDef())
diff --git a/llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h b/llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h
index 36b7632..4a341ef 100644
--- a/llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h
+++ b/llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h
@@ -41,6 +41,7 @@
     MSchedGraphNode *getDest() const { return dest; }
     unsigned getIteDiff() { return iteDiff; }
     unsigned getDepOrderType() { return depOrderType; }
+    void setDest(MSchedGraphNode *newDest) { dest = newDest; }
 
   private:
     friend class MSchedGraphNode;
@@ -70,15 +71,18 @@
     MSchedGraphNode(const MachineInstr *inst, MSchedGraph *graph, 
 		    unsigned index, unsigned late=0, bool isBranch=false);
 
+    MSchedGraphNode(const MSchedGraphNode &N);
+
     //Iterators
     typedef std::vector<MSchedGraphNode*>::iterator pred_iterator;
     pred_iterator pred_begin() { return Predecessors.begin(); }
     pred_iterator pred_end() { return Predecessors.end(); }
-    
+    unsigned pred_size() { return Predecessors.size(); }
+
     typedef std::vector<MSchedGraphNode*>::const_iterator pred_const_iterator;
     pred_const_iterator pred_begin() const { return Predecessors.begin(); }
     pred_const_iterator pred_end() const { return Predecessors.end(); }
-
+    
     // Successor iterators.
     typedef MSchedGraphNodeIterator<std::vector<MSchedGraphEdge>::const_iterator,
 				    const MSchedGraphNode> succ_const_iterator;
@@ -89,8 +93,32 @@
 				    MSchedGraphNode> succ_iterator;
     succ_iterator succ_begin();
     succ_iterator succ_end();
+  
+    unsigned succ_size() { return Successors.size(); }
 
-    
+    void setPredecessor(unsigned index, MSchedGraphNode *dest) {
+      Predecessors[index] = dest;
+    }
+
+    MSchedGraphNode* getPredecessor(unsigned index) {
+      return Predecessors[index];
+    }
+
+    MSchedGraphEdge* getSuccessor(unsigned index) {
+      return &Successors[index];
+    }
+
+    void deleteSuccessor(MSchedGraphNode *node) {
+      for (unsigned i = 0; i != Successors.size(); ++i)
+	if (Successors[i].getDest() == node) {
+	  Successors.erase(Successors.begin()+i);
+	  node->Predecessors.erase(std::find(node->Predecessors.begin(),
+					     node->Predecessors.end(), this));
+	  --i;
+	}
+    }
+
+
 
     void addOutEdge(MSchedGraphNode *destination, 
 		    MSchedGraphEdge::MSchedGraphEdgeType type, 
@@ -105,15 +133,15 @@
     unsigned getLatency() { return latency; }
     unsigned getLatency() const { return latency; }
     unsigned getIndex() { return index; }
+    unsigned getIteDiff(MSchedGraphNode *succ);
     MSchedGraphEdge getInEdge(MSchedGraphNode *pred);
     unsigned getInEdgeNum(MSchedGraphNode *pred);
-
     bool isSuccessor(MSchedGraphNode *);
     bool isPredecessor(MSchedGraphNode *);
     bool isBranch() { return isBranchInstr; }
     //Debug support
     void print(std::ostream &os) const;
-
+    void setParent(MSchedGraph *p) { Parent = p; }
   };
 
   template<class IteratorType, class NodeType>
@@ -172,6 +200,24 @@
   }
 
 
+  // Provide specializations of GraphTraits to be able to use graph
+  // iterators on the scheduling graph!
+  //
+  template <> struct GraphTraits<MSchedGraphNode*> {
+    typedef MSchedGraphNode NodeType;
+    typedef MSchedGraphNode::succ_iterator ChildIteratorType;
+    
+    static inline ChildIteratorType child_begin(NodeType *N) { 
+      return N->succ_begin(); 
+    }
+    static inline ChildIteratorType child_end(NodeType *N) { 
+      return N->succ_end();
+    }
+
+    static NodeType *getEntryNode(NodeType* N) { return N; }
+  };
+  
+
 
   class MSchedGraph {
     
@@ -193,11 +239,13 @@
 
   public:
     MSchedGraph(const MachineBasicBlock *bb, const TargetMachine &targ);
+    MSchedGraph(const MSchedGraph &G, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes);
     ~MSchedGraph();
     
     //Add Nodes to the Graph
     void addNode(const MachineInstr* MI, MSchedGraphNode *node);
-    
+    void deleteNode(MSchedGraphNode *node);
+
     //iterators 
     typedef std::map<const MachineInstr*, MSchedGraphNode*>::iterator iterator;
     typedef std::map<const MachineInstr*, MSchedGraphNode*>::const_iterator const_iterator;
@@ -205,9 +253,11 @@
     iterator find(const MachineInstr* I) { return GraphMap.find(I); }
     iterator end() { return GraphMap.end(); }
     iterator begin() { return GraphMap.begin(); }
+    unsigned size() { return GraphMap.size(); }
     reverse_iterator rbegin() { return GraphMap.rbegin(); }
     reverse_iterator rend() { return GraphMap.rend(); }
     const TargetMachine* getTarget() { return &Target; }
+    const MachineBasicBlock* getBB() { return BB; }
   };
 
   
@@ -242,14 +292,13 @@
     static nodes_iterator nodes_end(MSchedGraph *G) {
       return map_iterator(((MSchedGraph*)G)->end(), DerefFun(getSecond));
     }
-    
 
   };
   
   template <> struct GraphTraits<const MSchedGraph*> {
     typedef const MSchedGraphNode NodeType;
     typedef MSchedGraphNode::succ_const_iterator ChildIteratorType;
-    
+   
     static inline ChildIteratorType child_begin(NodeType *N) { 
       return N->succ_begin(); 
     }
diff --git a/llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp b/llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp
index f049bf0..15dc5b3 100644
--- a/llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp
+++ b/llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp
@@ -23,8 +23,10 @@
 #include "llvm/Target/TargetSchedInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Timer.h"
 #include <cmath>
 #include <algorithm>
 #include <fstream>
@@ -60,11 +62,21 @@
   O << "\n";
 };
 
+
+#if 1
+#define TIME_REGION(VARNAME, DESC) \
+   NamedRegionTimer VARNAME(DESC)
+#else
+#define TIME_REGION(VARNAME, DESC)
+#endif
+
+
 //Graph Traits for printing out the dependence graph
 namespace llvm {
   Statistic<> ValidLoops("modulosched-validLoops", "Number of candidate loops modulo-scheduled");
   Statistic<> MSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled");
   Statistic<> IncreasedII("modulosched-increasedII", "Number of times we had to increase II");
+  Statistic<> SingleBBLoops("modulosched-singeBBLoops", "Number of single basic block loops");
 
   template<>
   struct DOTGraphTraits<MSchedGraph*> : public DefaultDOTGraphTraits {
@@ -151,25 +163,40 @@
   //Iterate over the worklist and perform scheduling
   for(std::vector<MachineBasicBlock*>::iterator BI = Worklist.begin(),  
 	BE = Worklist.end(); BI != BE; ++BI) {
-    
-    CreateDefMap(*BI);
+
+    //Print out BB for debugging
+    DEBUG(std::cerr << "ModuloScheduling BB: \n"; (*BI)->print(std::cerr));
+
+    //Catch the odd case where we only have TmpInstructions and no real Value*s
+    if(!CreateDefMap(*BI)) {
+      //Clear out our maps for the next basic block that is processed
+      nodeToAttributesMap.clear();
+      partialOrder.clear();
+      recurrenceList.clear();
+      FinalNodeOrder.clear();
+      schedule.clear();
+      defMap.clear();
+      continue;
+    }
 
     MSchedGraph *MSG = new MSchedGraph(*BI, target);
     
     //Write Graph out to file
     DEBUG(WriteGraphToFile(std::cerr, F.getName(), MSG));
     
-    //Print out BB for debugging
-    DEBUG(std::cerr << "ModuloScheduling BB: \n"; (*BI)->print(std::cerr));
-    
     //Calculate Resource II
     int ResMII = calculateResMII(*BI);
     
     //Calculate Recurrence II
     int RecMII = calculateRecMII(MSG, ResMII);
+
+    DEBUG(std::cerr << "Number of reccurrences found: " << recurrenceList.size() << "\n");
+       
     
+
+
     //Our starting initiation interval is the maximum of RecMII and ResMII
-    /*II = std::max(RecMII, ResMII);
+    II = std::max(RecMII, ResMII);
     
     //Print out II, RecMII, and ResMII
     DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << " and ResMII=" << ResMII << ")\n");
@@ -177,10 +204,10 @@
     //Dump node properties if in debug mode
     DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I =  nodeToAttributesMap.begin(), 
 		E = nodeToAttributesMap.end(); I !=E; ++I) {
-      std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " 
-		<< I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth 
-		<< " Height: " << I->second.height << "\n";
-    });
+	    std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " 
+		      << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth 
+		      << " Height: " << I->second.height << "\n";
+	  });
 
     //Calculate Node Properties
     calculateNodeAttributes(MSG, ResMII);
@@ -188,10 +215,10 @@
     //Dump node properties if in debug mode
     DEBUG(for(std::map<MSchedGraphNode*, MSNodeAttributes>::iterator I =  nodeToAttributesMap.begin(), 
 		E = nodeToAttributesMap.end(); I !=E; ++I) {
-      std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " 
-		<< I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth 
-		<< " Height: " << I->second.height << "\n";
-    });
+	    std::cerr << "Node: " << *(I->first) << " ASAP: " << I->second.ASAP << " ALAP: " 
+		      << I->second.ALAP << " MOB: " << I->second.MOB << " Depth: " << I->second.depth 
+		      << " Height: " << I->second.height << "\n";
+	  });
     
     //Put nodes in order to schedule them
     computePartialOrder();
@@ -199,18 +226,18 @@
     //Dump out partial order
     DEBUG(for(std::vector<std::set<MSchedGraphNode*> >::iterator I = partialOrder.begin(), 
 		E = partialOrder.end(); I !=E; ++I) {
-      std::cerr << "Start set in PO\n";
-      for(std::set<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J)
-	std::cerr << "PO:" << **J << "\n";
-    });
+	    std::cerr << "Start set in PO\n";
+	    for(std::set<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J)
+	      std::cerr << "PO:" << **J << "\n";
+	  });
     
     //Place nodes in final order
     orderNodes();
     
     //Dump out order of nodes
     DEBUG(for(std::vector<MSchedGraphNode*>::iterator I = FinalNodeOrder.begin(), E = FinalNodeOrder.end(); I != E; ++I) {
-	  std::cerr << "FO:" << **I << "\n";
-    });
+	    std::cerr << "FO:" << **I << "\n";
+	  });
     
     //Finally schedule nodes
     bool haveSched = computeSchedule();
@@ -227,7 +254,7 @@
     }
     else
       DEBUG(std::cerr << "Max stage is 0, so no change in loop or reached cap\n");
-    */
+    
     //Clear out our maps for the next basic block that is processed
     nodeToAttributesMap.clear();
     partialOrder.clear();
@@ -249,7 +276,7 @@
   return Changed;
 }
 
-void ModuloSchedulingPass::CreateDefMap(MachineBasicBlock *BI) {
+bool ModuloSchedulingPass::CreateDefMap(MachineBasicBlock *BI) {
   defaultInst = 0;
 
   for(MachineBasicBlock::iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
@@ -257,7 +284,7 @@
       const MachineOperand &mOp = I->getOperand(opNum);
       if(mOp.getType() == MachineOperand::MO_VirtualRegister && mOp.isDef()) {
 	//assert if this is the second def we have seen
-	DEBUG(std::cerr << "Putting " << *(mOp.getVRegValue()) << " into map\n"); 
+	//DEBUG(std::cerr << "Putting " << *(mOp.getVRegValue()) << " into map\n"); 
 	assert(!defMap.count(mOp.getVRegValue()) && "Def already in the map");
 
 	defMap[mOp.getVRegValue()] = &*I;
@@ -272,7 +299,10 @@
     }
   }
   
-  assert(defaultInst && "We must have a default instruction to use as our main point to add to machine code for instruction\n");
+  if(!defaultInst)
+    return false;
+  
+  return true;
   
 }
 /// This function checks if a Machine Basic Block is valid for modulo
@@ -302,6 +332,10 @@
   if(BI->getBasicBlock()->size() == 1)
     return false;
 
+
+  //Increase number of single basic block loops for stats
+  ++SingleBBLoops;
+
   //Get Target machine instruction info
   const TargetInstrInfo *TMI = target.getInstrInfo();
     
@@ -311,6 +345,15 @@
     MachineOpCode OC = I->getOpcode();
     if(TMI->isCall(OC))
       return false;
+    //Look for conditional move
+    if(OC == V9::MOVRZr || OC == V9::MOVRZi || OC == V9::MOVRLEZr || OC == V9::MOVRLEZi 
+       || OC == V9::MOVRLZr || OC == V9::MOVRLZi || OC == V9::MOVRNZr || OC == V9::MOVRNZi
+       || OC == V9::MOVRGZr || OC == V9::MOVRGZi || OC == V9::MOVRGEZr 
+       || OC == V9::MOVRGEZi || OC == V9::MOVLEr || OC == V9::MOVLEi || OC == V9::MOVLEUr
+       || OC == V9::MOVLEUi || OC == V9::MOVFLEr || OC == V9::MOVFLEi
+       || OC == V9::MOVNEr || OC == V9::MOVNEi || OC == V9::MOVNEGr || OC == V9::MOVNEGi
+       || OC == V9::MOVFNEr || OC == V9::MOVFNEi)
+      return false;
   }
   return true;
 }
@@ -321,6 +364,8 @@
 //for each instruction
 int ModuloSchedulingPass::calculateResMII(const MachineBasicBlock *BI) {
   
+  TIME_REGION(X, "calculateResMII");
+
   const TargetInstrInfo *mii = target.getInstrInfo();
   const TargetSchedInfo *msi = target.getSchedInfo();
 
@@ -381,22 +426,22 @@
 /// calculateRecMII - Calculates the value of the highest recurrence
 /// By value we mean the total latency
 int ModuloSchedulingPass::calculateRecMII(MSchedGraph *graph, int MII) {
-  std::vector<MSchedGraphNode*> vNodes;
+  /*std::vector<MSchedGraphNode*> vNodes;
   //Loop over all nodes in the graph
   for(MSchedGraph::iterator I = graph->begin(), E = graph->end(); I != E; ++I) {
     findAllReccurrences(I->second, vNodes, MII);
     vNodes.clear();
-  }
+  }*/
+  
+  TIME_REGION(X, "calculateRecMII");
 
+  findAllCircuits(graph, MII);
   int RecMII = 0;
   
   for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::iterator I = recurrenceList.begin(), E=recurrenceList.end(); I !=E; ++I) {
-    DEBUG(for(std::vector<MSchedGraphNode*>::const_iterator N = I->second.begin(), NE = I->second.end(); N != NE; ++N) {
-      std::cerr << **N << "\n";
-    });
     RecMII = std::max(RecMII, I->first);
   }
-    
+ 
   return MII;
 }
 
@@ -405,6 +450,8 @@
 /// MOB.
 void ModuloSchedulingPass::calculateNodeAttributes(MSchedGraph *graph, int MII) {
 
+  TIME_REGION(X, "calculateNodeAttributes");
+
   assert(nodeToAttributesMap.empty() && "Node attribute map was not cleared");
 
   //Loop over the nodes and add them to the map
@@ -678,11 +725,234 @@
   
 }
 
+int CircCount;
+
+void ModuloSchedulingPass::unblock(MSchedGraphNode *u, std::set<MSchedGraphNode*> &blocked,
+	     std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B) {
+
+  //Unblock u
+  DEBUG(std::cerr << "Unblocking: " << *u << "\n");
+  blocked.erase(u);
+
+  //std::set<MSchedGraphNode*> toErase;
+  while (!B[u].empty()) {
+    MSchedGraphNode *W = *B[u].begin();
+    B[u].erase(W);
+    //toErase.insert(*W);
+    DEBUG(std::cerr << "Removed: " << *W << "from B-List\n");
+    if(blocked.count(W))
+      unblock(W, blocked, B);
+  }
+
+}
+
+bool ModuloSchedulingPass::circuit(MSchedGraphNode *v, std::vector<MSchedGraphNode*> &stack, 
+	     std::set<MSchedGraphNode*> &blocked, std::vector<MSchedGraphNode*> &SCC, 
+	     MSchedGraphNode *s, std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B, 
+				   int II, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) {
+  bool f = false;
+  
+  DEBUG(std::cerr << "Finding Circuits Starting with: ( " << v << ")"<< *v << "\n");
+
+  //Push node onto the stack
+  stack.push_back(v);
+
+  //block this node
+  blocked.insert(v);
+
+  //Loop over all successors of node v that are in the scc, create Adjaceny list
+  std::set<MSchedGraphNode*> AkV;
+  for(MSchedGraphNode::succ_iterator I = v->succ_begin(), E = v->succ_end(); I != E; ++I) {
+    if((std::find(SCC.begin(), SCC.end(), *I) != SCC.end())) {
+      AkV.insert(*I);
+    }
+  }
+
+  for(std::set<MSchedGraphNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) {
+    if(*I == s) {
+      //We have a circuit, so add it to our list
+      
+      std::vector<MSchedGraphNode*> recc;
+      //Dump recurrence for now
+      DEBUG(std::cerr << "Starting Recc\n");
+	
+      int totalDelay = 0;
+      int totalDistance = 0;
+      MSchedGraphNode *lastN = 0;
+
+      //Loop over recurrence, get delay and distance
+      for(std::vector<MSchedGraphNode*>::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) {
+	totalDelay += (*N)->getLatency();
+	if(lastN) {
+	  totalDistance += (*N)->getInEdge(lastN).getIteDiff();
+	}
+
+	//Get the original node
+	lastN = *N;
+	recc.push_back(newNodes[*N]);
+
+	DEBUG(std::cerr << *lastN << "\n");
+      }
+
+      //Get the loop edge
+      totalDistance += lastN->getIteDiff(*stack.begin());
+
+      DEBUG(std::cerr << "End Recc\n");
+      f = true;
+      CircCount++;
+
+      //Insert reccurrence into the list
+      DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n");
+      edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN])));
+	
+      //Adjust II until we get close to the inequality delay - II*distance <= 0
+      int RecMII = II; //Starting value
+      int value = totalDelay-(RecMII * totalDistance);
+      int lastII = II;
+      while(value <= 0) {
+	  
+	lastII = RecMII;
+	RecMII--;
+	value = totalDelay-(RecMII * totalDistance);
+      }
+
+      recurrenceList.insert(std::make_pair(lastII, recc));
+
+    }
+    else if(!blocked.count(*I)) {
+      if(circuit(*I, stack, blocked, SCC, s, B, II, newNodes))
+	f = true;
+    }
+    else
+      DEBUG(std::cerr << "Blocked: " << **I << "\n");
+  }
+
+
+  if(f) {
+    unblock(v, blocked, B);
+  }
+  else {
+    for(std::set<MSchedGraphNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) 
+      B[*I].insert(v);
+
+  }
+
+  //Pop v
+  stack.pop_back();
+
+  return f;
+
+}
+
+void ModuloSchedulingPass::findAllCircuits(MSchedGraph *g, int II) {
+
+  CircCount = 0;
+
+  //Keep old to new node mapping information 
+  std::map<MSchedGraphNode*, MSchedGraphNode*> newNodes;
+
+  //copy the graph
+  MSchedGraph *MSG = new MSchedGraph(*g, newNodes);
+
+  DEBUG(std::cerr << "Finding All Circuits\n");
+
+  //Set of blocked nodes
+  std::set<MSchedGraphNode*> blocked;
+
+  //Stack holding current circuit
+  std::vector<MSchedGraphNode*> stack;
+
+  //Map for B Lists
+  std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > B;
+
+  //current node
+  MSchedGraphNode *s;
+
+
+  //Iterate over the graph until its down to one node or empty
+  while(MSG->size() > 1) {
+    
+    //Write Graph out to file
+    //WriteGraphToFile(std::cerr, "Graph" + utostr(MSG->size()), MSG);
+
+    DEBUG(std::cerr << "Graph Size: " << MSG->size() << "\n");
+    DEBUG(std::cerr << "Finding strong component Vk with least vertex\n");
+
+    //Iterate over all the SCCs in the graph
+    std::set<MSchedGraphNode*> Visited;
+    std::vector<MSchedGraphNode*> Vk;
+    MSchedGraphNode* s = 0;
+
+    //Find scc with the least vertex
+    for (MSchedGraph::iterator GI = MSG->begin(), E = MSG->end(); GI != E; ++GI)
+      if (Visited.insert(GI->second).second) {
+	for (scc_iterator<MSchedGraphNode*> SCCI = scc_begin(GI->second),
+	       E = scc_end(GI->second); SCCI != E; ++SCCI) {
+	  std::vector<MSchedGraphNode*> &nextSCC = *SCCI;
+
+	  if (Visited.insert(nextSCC[0]).second) {
+	    Visited.insert(nextSCC.begin()+1, nextSCC.end());
+
+	    DEBUG(std::cerr << "SCC size: " << nextSCC.size() << "\n");
+
+	    //Ignore self loops
+	    if(nextSCC.size() > 1) {
+	    
+	      //Get least vertex in Vk
+	      if(!s) {
+		s = nextSCC[0];
+		Vk = nextSCC;
+	      }
+
+	      for(unsigned i = 0; i < nextSCC.size(); ++i) {
+		if(nextSCC[i] < s) {
+		  s = nextSCC[i];
+		  Vk = nextSCC;
+		}
+	      }
+	    }
+	  }
+	}
+      }
+  
+  
+
+    //Process SCC
+    DEBUG(for(std::vector<MSchedGraphNode*>::iterator N = Vk.begin(), NE = Vk.end();
+	      N != NE; ++N) { std::cerr << *((*N)->getInst()); });
+    
+    //Iterate over all nodes in this scc
+    for(std::vector<MSchedGraphNode*>::iterator N = Vk.begin(), NE = Vk.end();
+	N != NE; ++N) {
+      blocked.erase(*N);
+      B[*N].clear();
+    }
+    if(Vk.size() > 1) {
+      circuit(s, stack, blocked, Vk, s, B, II, newNodes);
+      
+      //Find all nodes up to s and delete them
+      std::vector<MSchedGraphNode*> nodesToRemove;
+      nodesToRemove.push_back(s);
+      for(MSchedGraph::iterator N = MSG->begin(), NE = MSG->end(); N != NE; ++N) {
+	if(N->second < s )
+	  nodesToRemove.push_back(N->second);
+      }
+      for(std::vector<MSchedGraphNode*>::iterator N = nodesToRemove.begin(), NE = nodesToRemove.end(); N != NE; ++N) {
+	DEBUG(std::cerr << "Deleting Node: " << **N << "\n");
+	MSG->deleteNode(*N);
+      }
+    }
+    else
+      break;
+  }
+  DEBUG(std::cerr << "Num Circuits found: " << CircCount << "\n");
+}
+
+
 void ModuloSchedulingPass::findAllReccurrences(MSchedGraphNode *node, 
 					       std::vector<MSchedGraphNode*> &visitedNodes,
 					       int II) {
-  if(node)
-    DEBUG(std::cerr << *(node->getInst()) << "\n");
+  
 
   if(std::find(visitedNodes.begin(), visitedNodes.end(), node) != visitedNodes.end()) {
     std::vector<MSchedGraphNode*> recurrence;
@@ -759,6 +1029,8 @@
 
 void ModuloSchedulingPass::computePartialOrder() {
 
+  TIME_REGION(X, "calculatePartialOrder");
+  
   //Only push BA branches onto the final node order, we put other branches after it
   //FIXME: Should we really be pushing branches on it a specific order instead of relying
   //on BA being there?
@@ -936,6 +1208,8 @@
 
 void ModuloSchedulingPass::orderNodes() {
   
+  TIME_REGION(X, "orderNodes");
+
   int BOTTOM_UP = 0;
   int TOP_DOWN = 1;
 
@@ -1151,6 +1425,8 @@
 
 bool ModuloSchedulingPass::computeSchedule() {
 
+  TIME_REGION(X, "computeSchedule");
+
   bool success = false;
   
   //FIXME: Should be set to max II of the original loop
@@ -1879,6 +2155,9 @@
 
 void ModuloSchedulingPass::reconstructLoop(MachineBasicBlock *BB) {
 
+  TIME_REGION(X, "reconstructLoop");
+
+
   DEBUG(std::cerr << "Reconstructing Loop\n");
 
   //First find the value *'s that we need to "save"
diff --git a/llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h b/llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h
index fa523d1..a8215d6 100644
--- a/llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h
+++ b/llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h
@@ -67,7 +67,7 @@
     int II;
 
     //Internal functions
-    void CreateDefMap(MachineBasicBlock *BI);
+    bool CreateDefMap(MachineBasicBlock *BI);
     bool MachineBBisValid(const MachineBasicBlock *BI);
     int calculateResMII(const MachineBasicBlock *BI);
     int calculateRecMII(MSchedGraph *graph, int MII);
@@ -87,6 +87,16 @@
 			     std::vector<MSchedGraphNode*> &visitedNodes, int II);
     void addReccurrence(std::vector<MSchedGraphNode*> &recurrence, int II, MSchedGraphNode*, MSchedGraphNode*);
 
+    void findAllCircuits(MSchedGraph *MSG, int II);
+    bool circuit(MSchedGraphNode *v, std::vector<MSchedGraphNode*> &stack, 
+		 std::set<MSchedGraphNode*> &blocked, 
+		 std::vector<MSchedGraphNode*> &SCC, MSchedGraphNode *s,
+		 std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B, int II,
+		 std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes);
+    
+    void unblock(MSchedGraphNode *u, std::set<MSchedGraphNode*> &blocked,
+		 std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B);
+
     void computePartialOrder();
     bool computeSchedule();
     bool scheduleNode(MSchedGraphNode *node,