Initial checkin of Datastructure analysis.

Has bugs, but shouldn't crash in theory.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@1994 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Analysis/DataStructure/FunctionRepBuilder.cpp b/lib/Analysis/DataStructure/FunctionRepBuilder.cpp
new file mode 100644
index 0000000..19c406c
--- /dev/null
+++ b/lib/Analysis/DataStructure/FunctionRepBuilder.cpp
@@ -0,0 +1,331 @@
+//===- FunctionRepBuilder.cpp - Build the datastructure graph for a method --===//
+//
+// Build the local datastructure graph for a single method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FunctionRepBuilder.h"
+#include "llvm/Function.h"
+#include "llvm/iMemory.h"
+#include "llvm/iPHINode.h"
+#include "llvm/iOther.h"
+#include "llvm/iTerminators.h"
+#include "llvm/DerivedTypes.h"
+#include "Support/STLExtras.h"
+#include <algorithm>
+
+// synthesizeNode - Create a new shadow node that is to be linked into this
+// chain..
+// FIXME: This should not take a FunctionRepBuilder as an argument!
+//
+ShadowDSNode *ShadowDSNode::synthesizeNode(const Type *Ty,
+                                           FunctionRepBuilder *Rep) {
+  // If we are a derived shadow node, defer to our parent to synthesize the node
+  if (ShadowParent) return ShadowParent->synthesizeNode(Ty, Rep);
+
+  // See if we have already synthesized a node of this type...
+  for (unsigned i = 0, e = SynthNodes.size(); i != e; ++i)
+    if (SynthNodes[i].first == Ty) return SynthNodes[i].second;
+
+  // No we haven't.  Do so now and add it to our list of saved nodes...
+  ShadowDSNode *SN = new ShadowDSNode(Ty, Mod, this);
+  SynthNodes.push_back(make_pair(Ty, SN));
+  Rep->addShadowNode(SN);
+  return SN;
+}
+
+
+
+
+// visitOperand - If the specified instruction operand is a global value, add
+// a node for it...
+//
+void InitVisitor::visitOperand(Value *V) {
+  if (!Rep->ValueMap.count(V))                  // Only process it once...
+    if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+      GlobalDSNode *N = new GlobalDSNode(GV);
+      Rep->Nodes.push_back(N);
+      Rep->ValueMap[V].add(N);
+      Rep->addAllUsesToWorkList(GV);
+    }
+}
+
+
+// visitCallInst - Create a call node for the callinst, and create as shadow
+// node if the call returns a pointer value.  Check to see if the call node
+// uses any global variables...
+//
+void InitVisitor::visitCallInst(CallInst *CI) {
+  CallDSNode *C = new CallDSNode(CI);
+  Rep->Nodes.push_back(C);
+  Rep->CallMap[CI] = C;
+      
+  if (isa<PointerType>(CI->getType())) {
+    // Create a shadow node to represent the memory object that the return
+    // value points to...
+    ShadowDSNode *Shad = new ShadowDSNode(C, Func->getParent());
+    Rep->ShadowNodes.push_back(Shad);
+    
+    // The return value of the function is a pointer to the shadow value
+    // just created...
+    //
+    C->getLink(0).add(Shad);
+
+    // The call instruction returns a pointer to the shadow block...
+    Rep->ValueMap[CI].add(Shad, CI);
+    
+    // If the call returns a value with pointer type, add all of the users
+    // of the call instruction to the work list...
+    Rep->addAllUsesToWorkList(CI);
+  }
+
+  // Loop over all of the operands of the call instruction (except the first
+  // one), to look for global variable references...
+  //
+  for_each(CI->op_begin()+1, CI->op_end(),   // Skip first arg
+           bind_obj(this, &InitVisitor::visitOperand));
+}
+
+
+// visitAllocationInst - Create an allocation node for the allocation.  Since
+// allocation instructions do not take pointer arguments, they cannot refer to
+// global vars...
+//
+void InitVisitor::visitAllocationInst(AllocationInst *AI) {
+  NewDSNode *N = new NewDSNode(AI);
+  Rep->Nodes.push_back(N);
+  
+  Rep->ValueMap[AI].add(N, AI);
+  
+  // Add all of the users of the malloc instruction to the work list...
+  Rep->addAllUsesToWorkList(AI);
+}
+
+
+// Visit all other instruction types.  Here we just scan, looking for uses of
+// global variables...
+//
+void InitVisitor::visitInstruction(Instruction *I) {
+  for_each(I->op_begin(), I->op_end(),
+           bind_obj(this, &InitVisitor::visitOperand));
+}
+
+
+// addAllUsesToWorkList - Add all of the instructions users of the specified
+// value to the work list for further processing...
+//
+void FunctionRepBuilder::addAllUsesToWorkList(Value *V) {
+  //cerr << "Adding all uses of " << V << "\n";
+  for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+    Instruction *Inst = cast<Instruction>(*I);
+    // When processing global values, it's possible that the instructions on
+    // the use list are not all in this method.  Only add the instructions
+    // that _are_ in this method.
+    //
+    if (Inst->getParent()->getParent() == F->getFunction())
+      // Only let an instruction occur on the work list once...
+      if (std::find(WorkList.begin(), WorkList.end(), Inst) == WorkList.end())
+        WorkList.push_back(Inst);
+  }
+}
+
+
+
+
+void FunctionRepBuilder::initializeWorkList(Function *Func) {
+  // Add all of the arguments to the method to the graph and add all users to
+  // the worklists...
+  //
+  for (Function::ArgumentListType::iterator I = Func->getArgumentList().begin(),
+         E = Func->getArgumentList().end(); I != E; ++I)
+    // Only process arguments that are of pointer type...
+    if (isa<PointerType>((*I)->getType())) {
+      ArgDSNode *Arg = new ArgDSNode(*I);
+      Nodes.push_back(Arg);
+      
+      // Add a shadow value for it to represent what it is pointing
+      // to and add this to the value map...
+      ShadowDSNode *Shad = new ShadowDSNode(Arg, Func->getParent());
+      ShadowNodes.push_back(Shad);
+      ValueMap[*I].add(PointerVal(Shad), *I);
+      
+      // The value of the argument is the shadow value...
+      Arg->getLink(0).add(Shad);
+      
+      // Make sure that all users of the argument are processed...
+      addAllUsesToWorkList(*I);
+    }
+  
+  // Iterate over the instructions in the method.  Create nodes for malloc and
+  // call instructions.  Add all uses of these to the worklist of instructions
+  // to process.
+  //
+  InitVisitor IV(this, Func);
+  IV.visit(Func);
+}
+
+
+
+
+PointerVal FunctionRepBuilder::getIndexedPointerDest(const PointerVal &InP,
+                                                     const MemAccessInst *MAI) {
+  unsigned Index = InP.Index;
+  const Type *SrcTy = MAI->getPointerOperand()->getType();
+
+  for (MemAccessInst::const_op_iterator I = MAI->idx_begin(),
+         E = MAI->idx_end(); I != E; ++I)
+    if ((*I)->getType() == Type::UByteTy) {     // Look for struct indices...
+      StructType *STy = cast<StructType>(SrcTy);
+      unsigned StructIdx = cast<ConstantUInt>(*I)->getValue();
+      for (unsigned i = 0; i != StructIdx; ++i)
+        Index += countPointerFields(STy->getContainedType(i));
+
+      // Advance SrcTy to be the new element type...
+      SrcTy = STy->getContainedType(StructIdx);
+    } else {
+      // Otherwise, stepping into array or initial pointer, just increment type
+      SrcTy = cast<SequentialType>(SrcTy)->getElementType();
+    }
+  
+  return PointerVal(InP.Node, Index);
+}
+
+static PointerValSet &getField(const PointerVal &DestPtr) {
+  assert(DestPtr.Node != 0);
+
+  return DestPtr.Node->getLink(DestPtr.Index);
+}
+
+
+// Reprocessing a GEP instruction is the result of the pointer operand
+// changing.  This means that the set of possible values for the GEP
+// needs to be expanded.
+//
+void FunctionRepBuilder::visitGetElementPtrInst(GetElementPtrInst *GEP) {
+  PointerValSet &GEPPVS = ValueMap[GEP];   // PointerValSet to expand
+      
+  // Get the input pointer val set...
+  const PointerValSet &SrcPVS = ValueMap[GEP->getOperand(0)];
+      
+  bool Changed = false;  // Process each input value... propogating it.
+  for (unsigned i = 0, e = SrcPVS.size(); i != e; ++i) {
+    // Calculate where the resulting pointer would point based on an
+    // input of 'Val' as the pointer type... and add it to our outgoing
+    // value set.  Keep track of whether or not we actually changed
+    // anything.
+    //
+    Changed |= GEPPVS.add(getIndexedPointerDest(SrcPVS[i], GEP));
+  }
+
+  // If our current value set changed, notify all of the users of our
+  // value.
+  //
+  if (Changed) addAllUsesToWorkList(GEP);        
+}
+
+void FunctionRepBuilder::visitReturnInst(ReturnInst *RI) {
+  RetNode.add(ValueMap[RI->getOperand(0)]);
+}
+
+void FunctionRepBuilder::visitLoadInst(LoadInst *LI) {
+  // Only loads that return pointers are interesting...
+  if (!isa<PointerType>(LI->getType())) return;
+  const PointerType *DestTy = cast<PointerType>(LI->getType());
+
+  const PointerValSet &SrcPVS = ValueMap[LI->getOperand(0)];        
+  PointerValSet &LIPVS = ValueMap[LI];
+
+  bool Changed = false;
+  for (unsigned si = 0, se = SrcPVS.size(); si != se; ++si) {
+    PointerVal Ptr = getIndexedPointerDest(SrcPVS[si], LI);
+    PointerValSet &Field = getField(Ptr);
+
+    if (Field.size()) {             // Field loaded wasn't null?
+      Changed |= LIPVS.add(Field);
+    } else if (Ptr.Node->NodeType == DSNode::ShadowNode) {
+      // If we are loading a null field out of a shadow node, we need to
+      // synthesize a new shadow node and link it in...
+      //
+      ShadowDSNode *Shad = (ShadowDSNode*)Ptr.Node;
+      ShadowDSNode *SynthNode =
+        Shad->synthesizeNode(DestTy->getElementType(), this);
+      Field.add(SynthNode);
+
+      Changed |= LIPVS.add(Field);
+    }
+  }
+
+  if (Changed) addAllUsesToWorkList(LI);
+}
+
+void FunctionRepBuilder::visitStoreInst(StoreInst *SI) {
+  // The only stores that are interesting are stores the store pointers
+  // into data structures...
+  //
+  if (!isa<PointerType>(SI->getOperand(0)->getType())) return;
+        
+  const PointerValSet &SrcPVS = ValueMap[SI->getOperand(0)];
+  const PointerValSet &PtrPVS = ValueMap[SI->getOperand(1)];
+
+  for (unsigned si = 0, se = SrcPVS.size(); si != se; ++si) {
+    const PointerVal &SrcPtr = SrcPVS[si];
+    for (unsigned pi = 0, pe = PtrPVS.size(); pi != pe; ++pi) {
+      PointerVal Dest = getIndexedPointerDest(PtrPVS[pi], SI);
+
+#if 0
+      cerr << "Setting Dest:\n";
+      Dest.print(cerr);
+      cerr << "to point to Src:\n";
+      SrcPtr.print(cerr);
+#endif
+
+      // Add SrcPtr into the Dest field...
+      if (getField(Dest).add(SrcPtr)) {
+        // If we modified the dest field, then invalidate everyone that points
+        // to Dest.
+        const std::vector<Value*> &Ptrs = Dest.Node->getPointers();
+        for (unsigned i = 0, e = Ptrs.size(); i != e; ++i)
+          addAllUsesToWorkList(Ptrs[i]);
+      }
+    }
+  }
+}
+
+void FunctionRepBuilder::visitCallInst(CallInst *CI) {
+  CallDSNode *DSN = CallMap[CI];
+   
+  unsigned PtrNum = 0, i = 0;
+  if (isa<Function>(CI->getOperand(0)))
+    ++i;          // Not an Indirect function call? Skip the function pointer...
+
+  for (unsigned e = CI->getNumOperands(); i != e; ++i)
+    if (isa<PointerType>(CI->getOperand(i)->getType()))
+      DSN->addArgValue(PtrNum++, ValueMap[CI->getOperand(i)]);
+}
+
+void FunctionRepBuilder::visitPHINode(PHINode *PN) {
+  assert(isa<PointerType>(PN->getType()) && "Should only update ptr phis");
+
+  PointerValSet &PN_PVS = ValueMap[PN];
+  bool Changed = false;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    Changed |= PN_PVS.add(ValueMap[PN->getIncomingValue(i)],
+                          PN->getIncomingValue(i));
+
+  if (Changed) addAllUsesToWorkList(PN);
+}
+
+
+
+
+// FunctionDSGraph constructor - Perform the global analysis to determine
+// what the data structure usage behavior or a method looks like.
+//
+FunctionDSGraph::FunctionDSGraph(Function *F) : Func(F) {
+  FunctionRepBuilder Builder(this);
+  Nodes = Builder.getNodes();
+  ShadowNodes = Builder.getShadowNodes();
+  RetNode = Builder.getRetNode();
+  ValueMap = Builder.getValueMap();
+}
+