[CFLAA] Move the graph builder out from CFLSteens. NFC.

Patch by Jia Chen.

Differential Revision: http://reviews.llvm.org/D22022

llvm-svn: 274958
diff --git a/llvm/lib/Analysis/CFLGraph.h b/llvm/lib/Analysis/CFLGraph.h
index cbc37f3..cfa5f54 100644
--- a/llvm/lib/Analysis/CFLGraph.h
+++ b/llvm/lib/Analysis/CFLGraph.h
@@ -17,6 +17,9 @@
 
 #include "AliasAnalysisSummary.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instructions.h"
 
 namespace llvm {
 namespace cflaa {
@@ -144,6 +147,412 @@
   bool empty() const { return NodeImpls.empty(); }
   std::size_t size() const { return NodeImpls.size(); }
 };
+
+///\brief A builder class used to create CFLGraph instance from a given function
+/// The CFL-AA that uses this builder must provide its own type as a template
+/// argument. This is necessary for interprocedural processing: CFLGraphBuilder
+/// needs a way of obtaining the summary of other functions when callinsts are
+/// encountered.
+/// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
+/// member function that takes a Function& and returns the corresponding summary
+/// as a const AliasSummary*.
+template <typename CFLAA> class CFLGraphBuilder {
+  // Input of the builder
+  CFLAA &Analysis;
+  const TargetLibraryInfo &TLI;
+
+  // Output of the builder
+  CFLGraph Graph;
+  SmallVector<Value *, 4> ReturnedValues;
+
+  // Auxiliary structures used by the builder
+  SmallVector<InstantiatedRelation, 8> InstantiatedRelations;
+  SmallVector<InstantiatedAttr, 8> InstantiatedAttrs;
+
+  // Helper class
+  /// Gets the edges our graph should have, based on an Instruction*
+  class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
+    CFLAA &AA;
+    const TargetLibraryInfo &TLI;
+
+    CFLGraph &Graph;
+    SmallVectorImpl<Value *> &ReturnValues;
+    SmallVectorImpl<InstantiatedRelation> &InstantiatedRelations;
+    SmallVectorImpl<InstantiatedAttr> &InstantiatedAttrs;
+
+    static bool hasUsefulEdges(ConstantExpr *CE) {
+      // ConstantExpr doesn't have terminators, invokes, or fences, so only
+      // needs
+      // to check for compares.
+      return CE->getOpcode() != Instruction::ICmp &&
+             CE->getOpcode() != Instruction::FCmp;
+    }
+
+    // Returns possible functions called by CS into the given SmallVectorImpl.
+    // Returns true if targets found, false otherwise.
+    static bool getPossibleTargets(CallSite CS,
+                                   SmallVectorImpl<Function *> &Output) {
+      if (auto *Fn = CS.getCalledFunction()) {
+        Output.push_back(Fn);
+        return true;
+      }
+
+      // TODO: If the call is indirect, we might be able to enumerate all
+      // potential
+      // targets of the call and return them, rather than just failing.
+      return false;
+    }
+
+    void addNode(Value *Val) {
+      assert(Val != nullptr);
+      if (!Graph.addNode(Val))
+        return;
+
+      if (isa<GlobalValue>(Val)) {
+        Graph.addAttr(Val, getGlobalOrArgAttrFromValue(*Val));
+        // Currently we do not attempt to be smart on globals
+        InstantiatedAttrs.push_back(
+            InstantiatedAttr{InstantiatedValue{Val, 1}, getAttrUnknown()});
+      } else if (auto CExpr = dyn_cast<ConstantExpr>(Val))
+        if (hasUsefulEdges(CExpr))
+          visitConstantExpr(CExpr);
+    }
+
+    void addNodeWithAttr(Value *Val, AliasAttrs Attr) {
+      addNode(Val);
+      Graph.addAttr(Val, Attr);
+    }
+
+    void addEdge(Value *From, Value *To, EdgeType Type) {
+      assert(From != nullptr && To != nullptr);
+      if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
+        return;
+      addNode(From);
+      if (To != From)
+        addNode(To);
+      Graph.addEdge(From, To, Type);
+    }
+
+  public:
+    GetEdgesVisitor(CFLGraphBuilder &Builder)
+        : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph),
+          ReturnValues(Builder.ReturnedValues),
+          InstantiatedRelations(Builder.InstantiatedRelations),
+          InstantiatedAttrs(Builder.InstantiatedAttrs) {}
+
+    void visitInstruction(Instruction &) {
+      llvm_unreachable("Unsupported instruction encountered");
+    }
+
+    void visitReturnInst(ReturnInst &Inst) {
+      if (auto RetVal = Inst.getReturnValue()) {
+        if (RetVal->getType()->isPointerTy()) {
+          addNode(RetVal);
+          ReturnValues.push_back(RetVal);
+        }
+      }
+    }
+
+    void visitPtrToIntInst(PtrToIntInst &Inst) {
+      auto *Ptr = Inst.getOperand(0);
+      addNodeWithAttr(Ptr, getAttrEscaped());
+    }
+
+    void visitIntToPtrInst(IntToPtrInst &Inst) {
+      auto *Ptr = &Inst;
+      addNodeWithAttr(Ptr, getAttrUnknown());
+    }
+
+    void visitCastInst(CastInst &Inst) {
+      auto *Src = Inst.getOperand(0);
+      addEdge(Src, &Inst, EdgeType::Assign);
+    }
+
+    void visitBinaryOperator(BinaryOperator &Inst) {
+      auto *Op1 = Inst.getOperand(0);
+      auto *Op2 = Inst.getOperand(1);
+      addEdge(Op1, &Inst, EdgeType::Assign);
+      addEdge(Op2, &Inst, EdgeType::Assign);
+    }
+
+    void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
+      auto *Ptr = Inst.getPointerOperand();
+      auto *Val = Inst.getNewValOperand();
+      addEdge(Ptr, Val, EdgeType::Dereference);
+    }
+
+    void visitAtomicRMWInst(AtomicRMWInst &Inst) {
+      auto *Ptr = Inst.getPointerOperand();
+      auto *Val = Inst.getValOperand();
+      addEdge(Ptr, Val, EdgeType::Dereference);
+    }
+
+    void visitPHINode(PHINode &Inst) {
+      for (Value *Val : Inst.incoming_values())
+        addEdge(Val, &Inst, EdgeType::Assign);
+    }
+
+    void visitGetElementPtrInst(GetElementPtrInst &Inst) {
+      auto *Op = Inst.getPointerOperand();
+      addEdge(Op, &Inst, EdgeType::Assign);
+    }
+
+    void visitSelectInst(SelectInst &Inst) {
+      // Condition is not processed here (The actual statement producing
+      // the condition result is processed elsewhere). For select, the
+      // condition is evaluated, but not loaded, stored, or assigned
+      // simply as a result of being the condition of a select.
+
+      auto *TrueVal = Inst.getTrueValue();
+      auto *FalseVal = Inst.getFalseValue();
+      addEdge(TrueVal, &Inst, EdgeType::Assign);
+      addEdge(FalseVal, &Inst, EdgeType::Assign);
+    }
+
+    void visitAllocaInst(AllocaInst &Inst) { Graph.addNode(&Inst); }
+
+    void visitLoadInst(LoadInst &Inst) {
+      auto *Ptr = Inst.getPointerOperand();
+      auto *Val = &Inst;
+      addEdge(Val, Ptr, EdgeType::Reference);
+    }
+
+    void visitStoreInst(StoreInst &Inst) {
+      auto *Ptr = Inst.getPointerOperand();
+      auto *Val = Inst.getValueOperand();
+      addEdge(Ptr, Val, EdgeType::Dereference);
+    }
+
+    void visitVAArgInst(VAArgInst &Inst) {
+      // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
+      // does
+      // two things:
+      //  1. Loads a value from *((T*)*Ptr).
+      //  2. Increments (stores to) *Ptr by some target-specific amount.
+      // For now, we'll handle this like a landingpad instruction (by placing
+      // the
+      // result in its own group, and having that group alias externals).
+      addNodeWithAttr(&Inst, getAttrUnknown());
+    }
+
+    static bool isFunctionExternal(Function *Fn) {
+      return !Fn->hasExactDefinition();
+    }
+
+    bool tryInterproceduralAnalysis(CallSite CS,
+                                    const SmallVectorImpl<Function *> &Fns) {
+      assert(Fns.size() > 0);
+
+      if (CS.arg_size() > MaxSupportedArgsInSummary)
+        return false;
+
+      // Exit early if we'll fail anyway
+      for (auto *Fn : Fns) {
+        if (isFunctionExternal(Fn) || Fn->isVarArg())
+          return false;
+        // Fail if the caller does not provide enough arguments
+        assert(Fn->arg_size() <= CS.arg_size());
+        if (!AA.getAliasSummary(*Fn))
+          return false;
+      }
+
+      for (auto *Fn : Fns) {
+        auto Summary = AA.getAliasSummary(*Fn);
+        assert(Summary != nullptr);
+
+        auto &RetParamRelations = Summary->RetParamRelations;
+        for (auto &Relation : RetParamRelations) {
+          auto IRelation = instantiateExternalRelation(Relation, CS);
+          if (IRelation.hasValue())
+            InstantiatedRelations.push_back(*IRelation);
+        }
+
+        auto &RetParamAttributes = Summary->RetParamAttributes;
+        for (auto &Attribute : RetParamAttributes) {
+          auto IAttr = instantiateExternalAttribute(Attribute, CS);
+          if (IAttr.hasValue())
+            InstantiatedAttrs.push_back(*IAttr);
+        }
+      }
+
+      return true;
+    }
+
+    void visitCallSite(CallSite CS) {
+      auto Inst = CS.getInstruction();
+
+      // Make sure all arguments and return value are added to the graph first
+      for (Value *V : CS.args())
+        addNode(V);
+      if (Inst->getType()->isPointerTy())
+        addNode(Inst);
+
+      // Check if Inst is a call to a library function that
+      // allocates/deallocates
+      // on the heap. Those kinds of functions do not introduce any aliases.
+      // TODO: address other common library functions such as realloc(),
+      // strdup(),
+      // etc.
+      if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
+          isFreeCall(Inst, &TLI))
+        return;
+
+      // TODO: Add support for noalias args/all the other fun function
+      // attributes
+      // that we can tack on.
+      SmallVector<Function *, 4> Targets;
+      if (getPossibleTargets(CS, Targets))
+        if (tryInterproceduralAnalysis(CS, Targets))
+          return;
+
+      // Because the function is opaque, we need to note that anything
+      // could have happened to the arguments (unless the function is marked
+      // readonly or readnone), and that the result could alias just about
+      // anything, too (unless the result is marked noalias).
+      if (!CS.onlyReadsMemory())
+        for (Value *V : CS.args()) {
+          if (V->getType()->isPointerTy()) {
+            // The argument itself escapes.
+            addNodeWithAttr(V, getAttrEscaped());
+            // The fate of argument memory is unknown. Note that since
+            // AliasAttrs
+            // is transitive with respect to dereference, we only need to
+            // specify
+            // it for the first-level memory.
+            InstantiatedAttrs.push_back(
+                InstantiatedAttr{InstantiatedValue{V, 1}, getAttrUnknown()});
+          }
+        }
+
+      if (Inst->getType()->isPointerTy()) {
+        auto *Fn = CS.getCalledFunction();
+        if (Fn == nullptr || !Fn->doesNotAlias(0))
+          // No need to call addNodeWithAttr() since we've added Inst at the
+          // beginning of this function and we know it is not a global.
+          Graph.addAttr(Inst, getAttrUnknown());
+      }
+    }
+
+    /// Because vectors/aggregates are immutable and unaddressable, there's
+    /// nothing we can do to coax a value out of them, other than calling
+    /// Extract{Element,Value}. We can effectively treat them as pointers to
+    /// arbitrary memory locations we can store in and load from.
+    void visitExtractElementInst(ExtractElementInst &Inst) {
+      auto *Ptr = Inst.getVectorOperand();
+      auto *Val = &Inst;
+      addEdge(Val, Ptr, EdgeType::Reference);
+    }
+
+    void visitInsertElementInst(InsertElementInst &Inst) {
+      auto *Vec = Inst.getOperand(0);
+      auto *Val = Inst.getOperand(1);
+      addEdge(Vec, &Inst, EdgeType::Assign);
+      addEdge(&Inst, Val, EdgeType::Dereference);
+    }
+
+    void visitLandingPadInst(LandingPadInst &Inst) {
+      // Exceptions come from "nowhere", from our analysis' perspective.
+      // So we place the instruction its own group, noting that said group may
+      // alias externals
+      addNodeWithAttr(&Inst, getAttrUnknown());
+    }
+
+    void visitInsertValueInst(InsertValueInst &Inst) {
+      auto *Agg = Inst.getOperand(0);
+      auto *Val = Inst.getOperand(1);
+      addEdge(Agg, &Inst, EdgeType::Assign);
+      addEdge(&Inst, Val, EdgeType::Dereference);
+    }
+
+    void visitExtractValueInst(ExtractValueInst &Inst) {
+      auto *Ptr = Inst.getAggregateOperand();
+      addEdge(&Inst, Ptr, EdgeType::Reference);
+    }
+
+    void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
+      auto *From1 = Inst.getOperand(0);
+      auto *From2 = Inst.getOperand(1);
+      addEdge(From1, &Inst, EdgeType::Assign);
+      addEdge(From2, &Inst, EdgeType::Assign);
+    }
+
+    void visitConstantExpr(ConstantExpr *CE) {
+      switch (CE->getOpcode()) {
+      default:
+        llvm_unreachable("Unknown instruction type encountered!");
+// Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
+  case Instruction::OPCODE:                                                    \
+    this->visit##OPCODE(*(CLASS *)CE);                                         \
+    break;
+#include "llvm/IR/Instruction.def"
+      }
+    }
+  };
+
+  // Helper functions
+
+  // Determines whether or not we an instruction is useless to us (e.g.
+  // FenceInst)
+  static bool hasUsefulEdges(Instruction *Inst) {
+    bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
+                                    !isa<InvokeInst>(Inst) &&
+                                    !isa<ReturnInst>(Inst);
+    return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
+           !IsNonInvokeRetTerminator;
+  }
+
+  void addArgumentToGraph(Argument &Arg) {
+    if (Arg.getType()->isPointerTy()) {
+      Graph.addNode(&Arg);
+      Graph.addAttr(&Arg, getGlobalOrArgAttrFromValue(Arg));
+      // Pointees of a formal parameter is known to the caller
+      InstantiatedAttrs.push_back(
+          InstantiatedAttr{InstantiatedValue{&Arg, 1}, getAttrCaller()});
+    }
+  }
+
+  // Given an Instruction, this will add it to the graph, along with any
+  // Instructions that are potentially only available from said Instruction
+  // For example, given the following line:
+  //   %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
+  // addInstructionToGraph would add both the `load` and `getelementptr`
+  // instructions to the graph appropriately.
+  void addInstructionToGraph(Instruction &Inst) {
+    if (!hasUsefulEdges(&Inst))
+      return;
+
+    GetEdgesVisitor(*this).visit(Inst);
+  }
+
+  // Builds the graph needed for constructing the StratifiedSets for the given
+  // function
+  void buildGraphFrom(Function &Fn) {
+    for (auto &Bb : Fn.getBasicBlockList())
+      for (auto &Inst : Bb.getInstList())
+        addInstructionToGraph(Inst);
+
+    for (auto &Arg : Fn.args())
+      addArgumentToGraph(Arg);
+  }
+
+public:
+  CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn)
+      : Analysis(Analysis), TLI(TLI) {
+    buildGraphFrom(Fn);
+  }
+
+  const CFLGraph &getCFLGraph() const { return Graph; }
+  const SmallVector<Value *, 4> &getReturnValues() const {
+    return ReturnedValues;
+  }
+  const SmallVector<InstantiatedRelation, 8> &getInstantiatedRelations() const {
+    return InstantiatedRelations;
+  }
+  const SmallVector<InstantiatedAttr, 8> &getInstantiatedAttrs() const {
+    return InstantiatedAttrs;
+  }
+};
 }
 }