llvm/lib/Analysis/CFLAliasAnalysis.cpp - toolchain/llvm-project - Gitiles

 //===- CFLAliasAnalysis.cpp - CFL-Based Alias Analysis Implementation ------==//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements a CFL-based context-insensitive alias analysis
 // algorithm. It does not depend on types. The algorithm is a mixture of the one
 // described in "Demand-driven alias analysis for C" by Xin Zheng and Radu
 // Rugina, and "Fast algorithms for Dyck-CFL-reachability with applications to
 // Alias Analysis" by Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the
 // papers, we build a graph of the uses of a variable, where each node is a
 // memory location, and each edge is an action that happened on that memory
 // location.  The "actions" can be one of Dereference, Reference, or Assign.
 //
 // Two variables are considered as aliasing iff you can reach one value's node
 // from the other value's node and the language formed by concatenating all of
 // the edge labels (actions) conforms to a context-free grammar.
 //
 // Because this algorithm requires a graph search on each query, we execute the
 // algorithm outlined in "Fast algorithms..." (mentioned above)
 // in order to transform the graph into sets of variables that may alias in
 // ~nlogn time (n = number of variables), which makes queries take constant
 // time.
 //===----------------------------------------------------------------------===//

 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
 // CFLAA is interprocedural. This is *technically* A Bad Thing, because
 // FunctionPasses are only allowed to inspect the Function that they're being
 // run on. Realistically, this likely isn't a problem until we allow
 // FunctionPasses to run concurrently.

 #include "llvm/Analysis/CFLAliasAnalysis.h"
 #include "StratifiedSets.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <memory>
 #include <tuple>

 using namespace llvm;

 #define DEBUG_TYPE "cfl-aa"

 CFLAAResult::CFLAAResult(const TargetLibraryInfo &TLI)
     : AAResultBase(), TLI(TLI) {}
 CFLAAResult::CFLAAResult(CFLAAResult &&Arg)
     : AAResultBase(std::move(Arg)), TLI(Arg.TLI) {}
 CFLAAResult::~CFLAAResult() {}

 /// We use InterfaceValue to describe parameters/return value, as well as
 /// potential memory locations that are pointed to by parameters/return value,
 /// of a function.
 /// Index is an integer which represents a single parameter or a return value.
 /// When the index is 0, it refers to the return value. Non-zero index i refers
 /// to the i-th parameter.
 /// DerefLevel indicates the number of dereferences one must perform on the
 /// parameter/return value to get this InterfaceValue.
 struct InterfaceValue {
   unsigned Index;
   unsigned DerefLevel;
 };

 bool operator==(InterfaceValue lhs, InterfaceValue rhs) {
   return lhs.Index == rhs.Index && lhs.DerefLevel == rhs.DerefLevel;
 }
 bool operator!=(InterfaceValue lhs, InterfaceValue rhs) {
   return !(lhs == rhs);
 }

 /// We use ExternalRelation to describe an externally visible aliasing relations
 /// between parameters/return value of a function.
 struct ExternalRelation {
   InterfaceValue From, To;
 };

 /// We use ExternalAttribute to describe an externally visible StratifiedAttrs
 /// for parameters/return value.
 struct ExternalAttribute {
   InterfaceValue IValue;
   StratifiedAttrs Attr;
 };

 /// Information we have about a function and would like to keep around.
 class CFLAAResult::FunctionInfo {
   StratifiedSets<Value *> Sets;

   // RetParamRelations is a collection of ExternalRelations.
   SmallVector<ExternalRelation, 8> RetParamRelations;

   // RetParamAttributes is a collection of ExternalAttributes.
   SmallVector<ExternalAttribute, 8> RetParamAttributes;

 public:
   FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
                StratifiedSets<Value *> S);

   const StratifiedSets<Value *> &getStratifiedSets() const { return Sets; }
   const SmallVectorImpl<ExternalRelation> &getRetParamRelations() const {
     return RetParamRelations;
   }
   const SmallVectorImpl<ExternalAttribute> &getRetParamAttributes() const {
     return RetParamAttributes;
   }
 };

 /// Try to go from a Value* to a Function*. Never returns nullptr.
 static Optional<Function *> parentFunctionOfValue(Value *);

 /// Returns possible functions called by the Inst* into the given
 /// SmallVectorImpl. Returns true if targets found, false otherwise. This is
 /// templated so we can use it with CallInsts and InvokeInsts.
 static bool getPossibleTargets(CallSite, SmallVectorImpl<Function *> &);

 const StratifiedIndex StratifiedLink::SetSentinel =
     std::numeric_limits<StratifiedIndex>::max();

 namespace {
 /// StratifiedInfo Attribute things.
 LLVM_CONSTEXPR unsigned MaxStratifiedAttrIndex = NumStratifiedAttrs;
 LLVM_CONSTEXPR unsigned AttrEscapedIndex = 0;
 LLVM_CONSTEXPR unsigned AttrUnknownIndex = 1;
 LLVM_CONSTEXPR unsigned AttrGlobalIndex = 2;
 LLVM_CONSTEXPR unsigned AttrCallerIndex = 3;
 LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 4;
 LLVM_CONSTEXPR unsigned AttrLastArgIndex = MaxStratifiedAttrIndex;
 LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;

 // NOTE: These aren't StratifiedAttrs because bitsets don't have a constexpr
 // ctor for some versions of MSVC that we support. We could maybe refactor,
 // but...
 using StratifiedAttr = unsigned;
 LLVM_CONSTEXPR StratifiedAttr AttrNone = 0;
 LLVM_CONSTEXPR StratifiedAttr AttrEscaped = 1 << AttrEscapedIndex;
 LLVM_CONSTEXPR StratifiedAttr AttrUnknown = 1 << AttrUnknownIndex;
 LLVM_CONSTEXPR StratifiedAttr AttrGlobal = 1 << AttrGlobalIndex;
 LLVM_CONSTEXPR StratifiedAttr AttrCaller = 1 << AttrCallerIndex;
 LLVM_CONSTEXPR StratifiedAttr ExternalAttrMask =
     AttrEscaped | AttrUnknown | AttrGlobal;

 /// The maximum number of arguments we can put into a summary.
 LLVM_CONSTEXPR unsigned MaxSupportedArgsInSummary = 50;

 /// StratifiedSets call for knowledge of "direction", so this is how we
 /// represent that locally.
 enum class Level { Same, Above, Below };

 /// Edges can be one of four "weights" -- each weight must have an inverse
 /// weight (Assign has Assign; Reference has Dereference).
 enum class EdgeType {
   /// The weight assigned when assigning from or to a value. For example, in:
   /// %b = getelementptr %a, 0
   /// ...The relationships are %b assign %a, and %a assign %b. This used to be
   /// two edges, but having a distinction bought us nothing.
   Assign,

   /// The edge used when we have an edge going from some handle to a Value.
   /// Examples of this include:
   /// %b = load %a              (%b Dereference %a)
   /// %b = extractelement %a, 0 (%a Dereference %b)
   Dereference,

   /// The edge used when our edge goes from a value to a handle that may have
   /// contained it at some point. Examples:
   /// %b = load %a              (%a Reference %b)
   /// %b = extractelement %a, 0 (%b Reference %a)
   Reference
 };

 /// The Program Expression Graph (PEG) of CFL analysis
 class CFLGraph {
   typedef Value *Node;

   struct Edge {
     EdgeType Type;
     Node Other;
   };

   typedef std::vector<Edge> EdgeList;

   struct NodeInfo {
     EdgeList Edges;
     StratifiedAttrs Attr;
   };

   typedef DenseMap<Node, NodeInfo> NodeMap;
   NodeMap NodeImpls;

   // Gets the inverse of a given EdgeType.
   static EdgeType flipWeight(EdgeType Initial) {
     switch (Initial) {
     case EdgeType::Assign:
       return EdgeType::Assign;
     case EdgeType::Dereference:
       return EdgeType::Reference;
     case EdgeType::Reference:
       return EdgeType::Dereference;
     }
     llvm_unreachable("Incomplete coverage of EdgeType enum");
   }

   const NodeInfo *getNode(Node N) const {
     auto Itr = NodeImpls.find(N);
     if (Itr == NodeImpls.end())
       return nullptr;
     return &Itr->second;
   }
   NodeInfo *getNode(Node N) {
     auto Itr = NodeImpls.find(N);
     if (Itr == NodeImpls.end())
       return nullptr;
     return &Itr->second;
   }

   static Node nodeDeref(const NodeMap::value_type &P) { return P.first; }
   typedef std::pointer_to_unary_function<const NodeMap::value_type &, Node>
       NodeDerefFun;

 public:
   typedef EdgeList::const_iterator const_edge_iterator;
   typedef mapped_iterator<NodeMap::const_iterator, NodeDerefFun>
       const_node_iterator;

   bool addNode(Node N) {
     return NodeImpls.insert(std::make_pair(N, NodeInfo{EdgeList(), AttrNone}))
         .second;
   }

   void addAttr(Node N, StratifiedAttrs Attr) {
     auto *Info = getNode(N);
     assert(Info != nullptr);
     Info->Attr |= Attr;
   }

   void addEdge(Node From, Node To, EdgeType Type) {
     auto *FromInfo = getNode(From);
     assert(FromInfo != nullptr);
     auto *ToInfo = getNode(To);
     assert(ToInfo != nullptr);

     FromInfo->Edges.push_back(Edge{Type, To});
     ToInfo->Edges.push_back(Edge{flipWeight(Type), From});
   }

   StratifiedAttrs attrFor(Node N) const {
     auto *Info = getNode(N);
     assert(Info != nullptr);
     return Info->Attr;
   }

   iterator_range<const_edge_iterator> edgesFor(Node N) const {
     auto *Info = getNode(N);
     assert(Info != nullptr);
     auto &Edges = Info->Edges;
     return make_range(Edges.begin(), Edges.end());
   }

   iterator_range<const_node_iterator> nodes() const {
     return make_range<const_node_iterator>(
         map_iterator(NodeImpls.begin(), NodeDerefFun(nodeDeref)),
         map_iterator(NodeImpls.end(), NodeDerefFun(nodeDeref)));
   }

   bool empty() const { return NodeImpls.empty(); }
   std::size_t size() const { return NodeImpls.size(); }
 };

 // This is the result of instantiating InterfaceValue at a particular callsite
 struct InterprocNode {
   Value *Val;
   unsigned DerefLevel;
 };

 // Interprocedural assignment edges that CFLGraph may not easily model
 struct InterprocEdge {
   InterprocNode From, To;
 };

 // Interprocedural attribute tagging that CFLGraph may not easily model
 struct InterprocAttr {
   InterprocNode Node;
   StratifiedAttrs Attr;
 };

 /// Gets the edges our graph should have, based on an Instruction*
 class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
   CFLAAResult &AA;
   const TargetLibraryInfo &TLI;

   CFLGraph &Graph;
   SmallVectorImpl<Value *> &ReturnValues;
   SmallPtrSetImpl<Value *> &Externals;
   SmallPtrSetImpl<Value *> &Escapes;
   SmallVectorImpl<InterprocEdge> &InterprocEdges;
   SmallVectorImpl<InterprocAttr> &InterprocAttrs;

   static bool hasUsefulEdges(ConstantExpr *CE) {
     // ConstantExpr doesn't have terminators, invokes, or fences, so only needs
     // to check for compares.
     return CE->getOpcode() != Instruction::ICmp &&
            CE->getOpcode() != Instruction::FCmp;
   }

   void addNode(Value *Val) {
     if (!Graph.addNode(Val))
       return;

     if (isa<GlobalValue>(Val))
       Externals.insert(Val);
     else if (auto CExpr = dyn_cast<ConstantExpr>(Val))
       if (hasUsefulEdges(CExpr))
         visitConstantExpr(CExpr);
   }

   void addNodeWithAttr(Value *Val, StratifiedAttrs Attr) {
     addNode(Val);
     Graph.addAttr(Val, Attr);
   }

   void addEdge(Value *From, Value *To, EdgeType Type) {
     if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
       return;
     addNode(From);
     if (To != From)
       addNode(To);
     Graph.addEdge(From, To, Type);
   }

 public:
   GetEdgesVisitor(CFLAAResult &AA, const TargetLibraryInfo &TLI,
                   CFLGraph &Graph, SmallVectorImpl<Value *> &ReturnValues,
                   SmallPtrSetImpl<Value *> &Externals,
                   SmallPtrSetImpl<Value *> &Escapes,
                   SmallVectorImpl<InterprocEdge> &InterprocEdges,
                   SmallVectorImpl<InterprocAttr> &InterprocAttrs)
       : AA(AA), TLI(TLI), Graph(Graph), ReturnValues(ReturnValues),
         Externals(Externals), Escapes(Escapes), InterprocEdges(InterprocEdges),
         InterprocAttrs(InterprocAttrs) {}

   void visitInstruction(Instruction &) {
     llvm_unreachable("Unsupported instruction encountered");
   }

   void visitReturnInst(ReturnInst &Inst) {
     if (auto RetVal = Inst.getReturnValue()) {
       if (RetVal->getType()->isPointerTy()) {
         addNode(RetVal);
         ReturnValues.push_back(RetVal);
       }
     }
   }

   void visitPtrToIntInst(PtrToIntInst &Inst) {
     auto *Ptr = Inst.getOperand(0);
     addNodeWithAttr(Ptr, AttrEscaped);
   }

   void visitIntToPtrInst(IntToPtrInst &Inst) {
     auto *Ptr = &Inst;
     addNodeWithAttr(Ptr, AttrUnknown);
   }

   void visitCastInst(CastInst &Inst) {
     auto *Src = Inst.getOperand(0);
     addEdge(Src, &Inst, EdgeType::Assign);
   }

   void visitBinaryOperator(BinaryOperator &Inst) {
     auto *Op1 = Inst.getOperand(0);
     auto *Op2 = Inst.getOperand(1);
     addEdge(Op1, &Inst, EdgeType::Assign);
     addEdge(Op2, &Inst, EdgeType::Assign);
   }

   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
     auto *Ptr = Inst.getPointerOperand();
     auto *Val = Inst.getNewValOperand();
     addEdge(Ptr, Val, EdgeType::Dereference);
   }

   void visitAtomicRMWInst(AtomicRMWInst &Inst) {
     auto *Ptr = Inst.getPointerOperand();
     auto *Val = Inst.getValOperand();
     addEdge(Ptr, Val, EdgeType::Dereference);
   }

   void visitPHINode(PHINode &Inst) {
     for (Value *Val : Inst.incoming_values())
       addEdge(Val, &Inst, EdgeType::Assign);
   }

   void visitGetElementPtrInst(GetElementPtrInst &Inst) {
     auto *Op = Inst.getPointerOperand();
     addEdge(Op, &Inst, EdgeType::Assign);
   }

   void visitSelectInst(SelectInst &Inst) {
     // Condition is not processed here (The actual statement producing
     // the condition result is processed elsewhere). For select, the
     // condition is evaluated, but not loaded, stored, or assigned
     // simply as a result of being the condition of a select.

     auto *TrueVal = Inst.getTrueValue();
     auto *FalseVal = Inst.getFalseValue();
     addEdge(TrueVal, &Inst, EdgeType::Assign);
     addEdge(FalseVal, &Inst, EdgeType::Assign);
   }

   void visitAllocaInst(AllocaInst &Inst) { Graph.addNode(&Inst); }

   void visitLoadInst(LoadInst &Inst) {
     auto *Ptr = Inst.getPointerOperand();
     auto *Val = &Inst;
     addEdge(Val, Ptr, EdgeType::Reference);
   }

   void visitStoreInst(StoreInst &Inst) {
     auto *Ptr = Inst.getPointerOperand();
     auto *Val = Inst.getValueOperand();
     addEdge(Ptr, Val, EdgeType::Dereference);
   }

   void visitVAArgInst(VAArgInst &Inst) {
     // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it does
     // two things:
     //  1. Loads a value from *((T*)*Ptr).
     //  2. Increments (stores to) *Ptr by some target-specific amount.
     // For now, we'll handle this like a landingpad instruction (by placing the
     // result in its own group, and having that group alias externals).
     addNodeWithAttr(&Inst, AttrUnknown);
   }

   static bool isFunctionExternal(Function *Fn) {
     return !Fn->hasExactDefinition();
   }

   bool tryInterproceduralAnalysis(CallSite CS,
                                   const SmallVectorImpl<Function *> &Fns) {
     assert(Fns.size() > 0);

     if (CS.arg_size() > MaxSupportedArgsInSummary)
       return false;

     // Exit early if we'll fail anyway
     for (auto *Fn : Fns) {
       if (isFunctionExternal(Fn) || Fn->isVarArg())
         return false;
       // Fail if the caller does not provide enough arguments
       assert(Fn->arg_size() <= CS.arg_size());
       auto &MaybeInfo = AA.ensureCached(Fn);
       if (!MaybeInfo.hasValue())
         return false;
     }

     auto InstantiateInterfaceIndex = [&CS](unsigned Index) {
       auto Value =
           (Index == 0) ? CS.getInstruction() : CS.getArgument(Index - 1);
       return Value->getType()->isPointerTy() ? Value : nullptr;
     };

     for (auto *Fn : Fns) {
       auto &FnInfo = AA.ensureCached(Fn);
       assert(FnInfo.hasValue());

       auto &RetParamRelations = FnInfo->getRetParamRelations();
       for (auto &Relation : RetParamRelations) {
         auto FromVal = InstantiateInterfaceIndex(Relation.From.Index);
         auto ToVal = InstantiateInterfaceIndex(Relation.To.Index);
         if (FromVal && ToVal) {
           auto FromLevel = Relation.From.DerefLevel;
           auto ToLevel = Relation.To.DerefLevel;
           InterprocEdges.push_back(
               InterprocEdge{InterprocNode{FromVal, FromLevel},
                             InterprocNode{ToVal, ToLevel}});
         }
       }

       auto &RetParamAttributes = FnInfo->getRetParamAttributes();
       for (auto &Attribute : RetParamAttributes) {
         if (auto Val = InstantiateInterfaceIndex(Attribute.IValue.Index)) {
           InterprocAttrs.push_back(InterprocAttr{
               InterprocNode{Val, Attribute.IValue.DerefLevel}, Attribute.Attr});
         }
       }
     }

     return true;
   }

   void visitCallSite(CallSite CS) {
     auto Inst = CS.getInstruction();

     // Make sure all arguments and return value are added to the graph first
     for (Value *V : CS.args())
       addNode(V);
     if (Inst->getType()->isPointerTy())
       addNode(Inst);

     // Check if Inst is a call to a library function that allocates/deallocates
     // on the heap. Those kinds of functions do not introduce any aliases.
     // TODO: address other common library functions such as realloc(), strdup(),
     // etc.
     if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
         isFreeCall(Inst, &TLI))
       return;

     // TODO: Add support for noalias args/all the other fun function attributes
     // that we can tack on.
     SmallVector<Function *, 4> Targets;
     if (getPossibleTargets(CS, Targets))
       if (tryInterproceduralAnalysis(CS, Targets))
         return;

     // Because the function is opaque, we need to note that anything
     // could have happened to the arguments (unless the function is marked
     // readonly or readnone), and that the result could alias just about
     // anything, too (unless the result is marked noalias).
     if (!CS.onlyReadsMemory())
       for (Value *V : CS.args()) {
         if (V->getType()->isPointerTy())
           Escapes.insert(V);
       }

     if (Inst->getType()->isPointerTy()) {
       auto *Fn = CS.getCalledFunction();
       if (Fn == nullptr || !Fn->doesNotAlias(0))
         Graph.addAttr(Inst, AttrUnknown);
     }
   }

   /// Because vectors/aggregates are immutable and unaddressable, there's
   /// nothing we can do to coax a value out of them, other than calling
   /// Extract{Element,Value}. We can effectively treat them as pointers to
   /// arbitrary memory locations we can store in and load from.
   void visitExtractElementInst(ExtractElementInst &Inst) {
     auto *Ptr = Inst.getVectorOperand();
     auto *Val = &Inst;
     addEdge(Val, Ptr, EdgeType::Reference);
   }

   void visitInsertElementInst(InsertElementInst &Inst) {
     auto *Vec = Inst.getOperand(0);
     auto *Val = Inst.getOperand(1);
     addEdge(Vec, &Inst, EdgeType::Assign);
     addEdge(&Inst, Val, EdgeType::Dereference);
   }

   void visitLandingPadInst(LandingPadInst &Inst) {
     // Exceptions come from "nowhere", from our analysis' perspective.
     // So we place the instruction its own group, noting that said group may
     // alias externals
     addNodeWithAttr(&Inst, AttrUnknown);
   }

   void visitInsertValueInst(InsertValueInst &Inst) {
     auto *Agg = Inst.getOperand(0);
     auto *Val = Inst.getOperand(1);
     addEdge(Agg, &Inst, EdgeType::Assign);
     addEdge(&Inst, Val, EdgeType::Dereference);
   }

   void visitExtractValueInst(ExtractValueInst &Inst) {
     auto *Ptr = Inst.getAggregateOperand();
     addEdge(&Inst, Ptr, EdgeType::Reference);
   }

   void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
     auto *From1 = Inst.getOperand(0);
     auto *From2 = Inst.getOperand(1);
     addEdge(From1, &Inst, EdgeType::Assign);
     addEdge(From2, &Inst, EdgeType::Assign);
   }

   void visitConstantExpr(ConstantExpr *CE) {
     switch (CE->getOpcode()) {
     default:
       llvm_unreachable("Unknown instruction type encountered!");
 // Build the switch statement using the Instruction.def file.
 #define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
   case Instruction::OPCODE:                                                    \
     visit##OPCODE(*(CLASS *)CE);                                               \
     break;
 #include "llvm/IR/Instruction.def"
     }
   }
 };

 class CFLGraphBuilder {
   // Input of the builder
   CFLAAResult &Analysis;
   const TargetLibraryInfo &TLI;

   // Output of the builder
   CFLGraph Graph;
   SmallVector<Value *, 4> ReturnedValues;

   // Auxiliary structures used by the builder
   SmallPtrSet<Value *, 8> ExternalValues;
   SmallPtrSet<Value *, 8> EscapedValues;
   SmallVector<InterprocEdge, 8> InterprocEdges;
   SmallVector<InterprocAttr, 8> InterprocAttrs;

   // Helper functions

   // Determines whether or not we an instruction is useless to us (e.g.
   // FenceInst)
   static bool hasUsefulEdges(Instruction *Inst) {
     bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
                                     !isa<InvokeInst>(Inst) &&
                                     !isa<ReturnInst>(Inst);
     return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
            !IsNonInvokeRetTerminator;
   }

   void addArgumentToGraph(Argument &Arg) {
     if (Arg.getType()->isPointerTy()) {
       Graph.addNode(&Arg);
       ExternalValues.insert(&Arg);
     }
   }

   // Given an Instruction, this will add it to the graph, along with any
   // Instructions that are potentially only available from said Instruction
   // For example, given the following line:
   //   %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
   // addInstructionToGraph would add both the `load` and `getelementptr`
   // instructions to the graph appropriately.
   void addInstructionToGraph(Instruction &Inst) {
     if (!hasUsefulEdges(&Inst))
       return;

     GetEdgesVisitor(Analysis, TLI, Graph, ReturnedValues, ExternalValues,
                     EscapedValues, InterprocEdges, InterprocAttrs)
         .visit(Inst);
   }

   // Builds the graph needed for constructing the StratifiedSets for the given
   // function
   void buildGraphFrom(Function &Fn) {
     for (auto &Bb : Fn.getBasicBlockList())
       for (auto &Inst : Bb.getInstList())
         addInstructionToGraph(Inst);

     for (auto &Arg : Fn.args())
       addArgumentToGraph(Arg);
   }

 public:
   CFLGraphBuilder(CFLAAResult &Analysis, const TargetLibraryInfo &TLI,
                   Function &Fn)
       : Analysis(Analysis), TLI(TLI) {
     buildGraphFrom(Fn);
   }

   const CFLGraph &getCFLGraph() const { return Graph; }
   const SmallVector<Value *, 4> &getReturnValues() const {
     return ReturnedValues;
   }
   const SmallPtrSet<Value *, 8> &getExternalValues() const {
     return ExternalValues;
   }
   const SmallPtrSet<Value *, 8> &getEscapedValues() const {
     return EscapedValues;
   }
   const SmallVector<InterprocEdge, 8> &getInterprocEdges() const {
     return InterprocEdges;
   }
   const SmallVector<InterprocAttr, 8> &getInterprocAttrs() const {
     return InterprocAttrs;
   }
 };
 }

 //===----------------------------------------------------------------------===//
 // Function declarations that require types defined in the namespace above
 //===----------------------------------------------------------------------===//

 /// Given a StratifiedAttrs, returns true if it marks the corresponding values
 /// as globals or arguments
 static bool isGlobalOrArgAttr(StratifiedAttrs Attr);

 /// Given a StratifiedAttrs, returns true if the corresponding values come from
 /// an unknown source (such as opaque memory or an integer cast)
 static bool isUnknownAttr(StratifiedAttrs Attr);

 /// Given an argument number, returns the appropriate StratifiedAttr to set.
 static StratifiedAttrs argNumberToAttr(unsigned ArgNum);

 /// Given a Value, potentially return which StratifiedAttr it maps to.
 static Optional<StratifiedAttrs> valueToAttr(Value *Val);

 /// Gets the "Level" that one should travel in StratifiedSets
 /// given an EdgeType.
 static Level directionOfEdgeType(EdgeType);

 /// Determines whether it would be pointless to add the given Value to our sets.
 static bool canSkipAddingToSets(Value *Val);

 static Optional<Function *> parentFunctionOfValue(Value *Val) {
   if (auto *Inst = dyn_cast<Instruction>(Val)) {
     auto *Bb = Inst->getParent();
     return Bb->getParent();
   }

   if (auto *Arg = dyn_cast<Argument>(Val))
     return Arg->getParent();
   return None;
 }

 static bool getPossibleTargets(CallSite CS,
                                SmallVectorImpl<Function *> &Output) {
   if (auto *Fn = CS.getCalledFunction()) {
     Output.push_back(Fn);
     return true;
   }

   // TODO: If the call is indirect, we might be able to enumerate all potential
   // targets of the call and return them, rather than just failing.
   return false;
 }

 static bool isGlobalOrArgAttr(StratifiedAttrs Attr) {
   return Attr.reset(AttrEscapedIndex)
       .reset(AttrUnknownIndex)
       .reset(AttrCallerIndex)
       .any();
 }

 static bool isUnknownAttr(StratifiedAttrs Attr) {
   return Attr.test(AttrUnknownIndex) || Attr.test(AttrCallerIndex);
 }

 static Optional<StratifiedAttrs> valueToAttr(Value *Val) {
   if (isa<GlobalValue>(Val))
     return StratifiedAttrs(AttrGlobal);

   if (auto *Arg = dyn_cast<Argument>(Val))
     // Only pointer arguments should have the argument attribute,
     // because things can't escape through scalars without us seeing a
     // cast, and thus, interaction with them doesn't matter.
     if (!Arg->hasNoAliasAttr() && Arg->getType()->isPointerTy())
       return argNumberToAttr(Arg->getArgNo());
   return None;
 }

 static StratifiedAttrs argNumberToAttr(unsigned ArgNum) {
   if (ArgNum >= AttrMaxNumArgs)
     return AttrUnknown;
   // N.B. MSVC complains if we use `1U` here, since StratifiedAttrs' ctor takes
   // an unsigned long long.
   return StratifiedAttrs(1ULL << (ArgNum + AttrFirstArgIndex));
 }

 static Level directionOfEdgeType(EdgeType Weight) {
   switch (Weight) {
   case EdgeType::Reference:
     return Level::Above;
   case EdgeType::Dereference:
     return Level::Below;
   case EdgeType::Assign:
     return Level::Same;
   }
   llvm_unreachable("Incomplete switch coverage");
 }

 static bool canSkipAddingToSets(Value *Val) {
   // Constants can share instances, which may falsely unify multiple
   // sets, e.g. in
   // store i32* null, i32** %ptr1
   // store i32* null, i32** %ptr2
   // clearly ptr1 and ptr2 should not be unified into the same set, so
   // we should filter out the (potentially shared) instance to
   // i32* null.
   if (isa<Constant>(Val)) {
     // TODO: Because all of these things are constant, we can determine whether
     // the data is *actually* mutable at graph building time. This will probably
     // come for free/cheap with offset awareness.
     bool CanStoreMutableData = isa<GlobalValue>(Val) ||
                                isa<ConstantExpr>(Val) ||
                                isa<ConstantAggregate>(Val);
     return !CanStoreMutableData;
   }

   return false;
 }

 CFLAAResult::FunctionInfo::FunctionInfo(Function &Fn,
                                         const SmallVectorImpl<Value *> &RetVals,
                                         StratifiedSets<Value *> S)
     : Sets(std::move(S)) {
   // Historically, an arbitrary upper-bound of 50 args was selected. We may want
   // to remove this if it doesn't really matter in practice.
   if (Fn.arg_size() > MaxSupportedArgsInSummary)
     return;

   DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;

   // Our intention here is to record all InterfaceValues that share the same
   // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
   // have its StratifiedIndex scanned here and check if the index is presented
   // in InterfaceMap: if it is not, we add the correspondence to the map;
   // otherwise, an aliasing relation is found and we add it to
   // RetParamRelations.

   auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
                                     StratifiedIndex SetIndex) {
     unsigned Level = 0;
     while (true) {
       InterfaceValue CurrValue{InterfaceIndex, Level};

       auto Itr = InterfaceMap.find(SetIndex);
       if (Itr != InterfaceMap.end()) {
         if (CurrValue != Itr->second)
           RetParamRelations.push_back(ExternalRelation{CurrValue, Itr->second});
         break;
       }

       auto &Link = Sets.getLink(SetIndex);
       InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
       auto ExternalAttrs = Link.Attrs & StratifiedAttrs(ExternalAttrMask);
       if (ExternalAttrs.any())
         RetParamAttributes.push_back(
             ExternalAttribute{CurrValue, ExternalAttrs});

       if (!Link.hasBelow())
         break;

       ++Level;
       SetIndex = Link.Below;
     }
   };

   // Populate RetParamRelations for return values
   for (auto *RetVal : RetVals) {
     assert(RetVal != nullptr);
     assert(RetVal->getType()->isPointerTy());
     auto RetInfo = Sets.find(RetVal);
     if (RetInfo.hasValue())
       AddToRetParamRelations(0, RetInfo->Index);
   }

   // Populate RetParamRelations for parameters
   unsigned I = 0;
   for (auto &Param : Fn.args()) {
     if (Param.getType()->isPointerTy()) {
       auto ParamInfo = Sets.find(&Param);
       if (ParamInfo.hasValue())
         AddToRetParamRelations(I + 1, ParamInfo->Index);
     }
     ++I;
   }
 }

 // Builds the graph + StratifiedSets for a function.
 CFLAAResult::FunctionInfo CFLAAResult::buildSetsFrom(Function *Fn) {
   CFLGraphBuilder GraphBuilder(*this, TLI, *Fn);
   StratifiedSetsBuilder<Value *> SetBuilder;

   auto &Graph = GraphBuilder.getCFLGraph();
   SmallVector<Value *, 16> Worklist;
   for (auto Node : Graph.nodes())
     Worklist.push_back(Node);

   while (!Worklist.empty()) {
     auto *CurValue = Worklist.pop_back_val();
     SetBuilder.add(CurValue);
     if (canSkipAddingToSets(CurValue))
       continue;

     auto Attr = Graph.attrFor(CurValue);
     SetBuilder.noteAttributes(CurValue, Attr);

     for (const auto &Edge : Graph.edgesFor(CurValue)) {
       auto Label = Edge.Type;
       auto *OtherValue = Edge.Other;

       if (canSkipAddingToSets(OtherValue))
         continue;

       bool Added;
       switch (directionOfEdgeType(Label)) {
       case Level::Above:
         Added = SetBuilder.addAbove(CurValue, OtherValue);
         break;
       case Level::Below:
         Added = SetBuilder.addBelow(CurValue, OtherValue);
         break;
       case Level::Same:
         Added = SetBuilder.addWith(CurValue, OtherValue);
         break;
       }

       if (Added)
         Worklist.push_back(OtherValue);
     }
   }

   // Special handling for globals and arguments
   for (auto *External : GraphBuilder.getExternalValues()) {
     SetBuilder.add(External);
     auto Attr = valueToAttr(External);
     if (Attr.hasValue()) {
       SetBuilder.noteAttributes(External, *Attr);
       if (*Attr == AttrGlobal)
         SetBuilder.addAttributesBelow(External, 1, AttrUnknown);
       else
         SetBuilder.addAttributesBelow(External, 1, AttrCaller);
     }
   }

   // Special handling for interprocedural aliases
   for (auto &Edge : GraphBuilder.getInterprocEdges()) {
     auto FromVal = Edge.From.Val;
     auto ToVal = Edge.To.Val;
     SetBuilder.add(FromVal);
     SetBuilder.add(ToVal);
     SetBuilder.addBelowWith(FromVal, Edge.From.DerefLevel, ToVal,
                             Edge.To.DerefLevel);
   }

   // Special handling for interprocedural attributes
   for (auto &IPAttr : GraphBuilder.getInterprocAttrs()) {
     auto Val = IPAttr.Node.Val;
     SetBuilder.add(Val);
     SetBuilder.addAttributesBelow(Val, IPAttr.Node.DerefLevel, IPAttr.Attr);
   }

   // Special handling for opaque external functions
   for (auto *Escape : GraphBuilder.getEscapedValues()) {
     SetBuilder.add(Escape);
     SetBuilder.noteAttributes(Escape, AttrEscaped);
     SetBuilder.addAttributesBelow(Escape, 1, AttrUnknown);
   }

   return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
 }

 void CFLAAResult::scan(Function *Fn) {
   auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
   (void)InsertPair;
   assert(InsertPair.second &&
          "Trying to scan a function that has already been cached");

   // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
   // may get evaluated after operator[], potentially triggering a DenseMap
   // resize and invalidating the reference returned by operator[]
   auto FunInfo = buildSetsFrom(Fn);
   Cache[Fn] = std::move(FunInfo);

   Handles.push_front(FunctionHandle(Fn, this));
 }

 void CFLAAResult::evict(Function *Fn) { Cache.erase(Fn); }

 /// Ensures that the given function is available in the cache, and returns the
 /// entry.
 const Optional<CFLAAResult::FunctionInfo> &
 CFLAAResult::ensureCached(Function *Fn) {
   auto Iter = Cache.find(Fn);
   if (Iter == Cache.end()) {
     scan(Fn);
     Iter = Cache.find(Fn);
     assert(Iter != Cache.end());
     assert(Iter->second.hasValue());
   }
   return Iter->second;
 }

 AliasResult CFLAAResult::query(const MemoryLocation &LocA,
                                const MemoryLocation &LocB) {
   auto *ValA = const_cast<Value *>(LocA.Ptr);
   auto *ValB = const_cast<Value *>(LocB.Ptr);

   if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
     return NoAlias;

   Function *Fn = nullptr;
   auto MaybeFnA = parentFunctionOfValue(ValA);
   auto MaybeFnB = parentFunctionOfValue(ValB);
   if (!MaybeFnA.hasValue() && !MaybeFnB.hasValue()) {
     // The only times this is known to happen are when globals + InlineAsm are
     // involved
     DEBUG(dbgs() << "CFLAA: could not extract parent function information.\n");
     return MayAlias;
   }

   if (MaybeFnA.hasValue()) {
     Fn = *MaybeFnA;
     assert((!MaybeFnB.hasValue() || *MaybeFnB == *MaybeFnA) &&
            "Interprocedural queries not supported");
   } else {
     Fn = *MaybeFnB;
   }

   assert(Fn != nullptr);
   auto &MaybeInfo = ensureCached(Fn);
   assert(MaybeInfo.hasValue());

   auto &Sets = MaybeInfo->getStratifiedSets();
   auto MaybeA = Sets.find(ValA);
   if (!MaybeA.hasValue())
     return MayAlias;

   auto MaybeB = Sets.find(ValB);
   if (!MaybeB.hasValue())
     return MayAlias;

   auto SetA = *MaybeA;
   auto SetB = *MaybeB;
   auto AttrsA = Sets.getLink(SetA.Index).Attrs;
   auto AttrsB = Sets.getLink(SetB.Index).Attrs;

   // If both values are local (meaning the corresponding set has attribute
   // AttrNone or AttrEscaped), then we know that CFLAA fully models them: they
   // may-alias each other if and only if they are in the same set
   // If at least one value is non-local (meaning it either is global/argument or
   // it comes from unknown sources like integer cast), the situation becomes a
   // bit more interesting. We follow three general rules described below:
   // - Non-local values may alias each other
   // - AttrNone values do not alias any non-local values
   // - AttrEscaped do not alias globals/arguments, but they may alias
   // AttrUnknown values
   if (SetA.Index == SetB.Index)
     return MayAlias;
   if (AttrsA.none() || AttrsB.none())
     return NoAlias;
   if (isUnknownAttr(AttrsA) || isUnknownAttr(AttrsB))
     return MayAlias;
   if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
     return MayAlias;
   return NoAlias;
 }

 ModRefInfo CFLAAResult::getArgModRefInfo(ImmutableCallSite CS,
                                          unsigned ArgIdx) {
   if (auto CalledFunc = CS.getCalledFunction()) {
     auto &MaybeInfo = ensureCached(const_cast<Function *>(CalledFunc));
     if (!MaybeInfo.hasValue())
       return MRI_ModRef;
     auto &RetParamAttributes = MaybeInfo->getRetParamAttributes();
     auto &RetParamRelations = MaybeInfo->getRetParamRelations();

     bool ArgAttributeIsWritten =
         std::any_of(RetParamAttributes.begin(), RetParamAttributes.end(),
                     [ArgIdx](const ExternalAttribute &ExtAttr) {
                       return ExtAttr.IValue.Index == ArgIdx + 1;
                     });
     bool ArgIsAccessed =
         std::any_of(RetParamRelations.begin(), RetParamRelations.end(),
                     [ArgIdx](const ExternalRelation &ExtRelation) {
                       return ExtRelation.To.Index == ArgIdx + 1 ||
                              ExtRelation.From.Index == ArgIdx + 1;
                     });

     return (!ArgIsAccessed && !ArgAttributeIsWritten) ? MRI_NoModRef
                                                       : MRI_ModRef;
   }

   return MRI_ModRef;
 }

 FunctionModRefBehavior CFLAAResult::getModRefBehavior(ImmutableCallSite CS) {
   // If we know the callee, try analyzing it
   if (auto CalledFunc = CS.getCalledFunction())
     return getModRefBehavior(CalledFunc);

   // Otherwise, be conservative
   return FMRB_UnknownModRefBehavior;
 }

 FunctionModRefBehavior CFLAAResult::getModRefBehavior(const Function *F) {
   assert(F != nullptr);

   // TODO: Remove the const_cast
   auto &MaybeInfo = ensureCached(const_cast<Function *>(F));
   if (!MaybeInfo.hasValue())
     return FMRB_UnknownModRefBehavior;
   auto &RetParamAttributes = MaybeInfo->getRetParamAttributes();
   auto &RetParamRelations = MaybeInfo->getRetParamRelations();

   // First, if any argument is marked Escpaed, Unknown or Global, anything may
   // happen to them and thus we can't draw any conclusion.
   if (!RetParamAttributes.empty())
     return FMRB_UnknownModRefBehavior;

   // Currently we don't (and can't) distinguish reads from writes in
   // RetParamRelations. All we can say is whether there may be memory access or
   // not.
   if (RetParamRelations.empty())
     return FMRB_DoesNotAccessMemory;

   // Check if something beyond argmem gets touched.
   bool AccessArgMemoryOnly =
       std::all_of(RetParamRelations.begin(), RetParamRelations.end(),
                   [](const ExternalRelation &ExtRelation) {
                     // Both DerefLevels has to be 0, since we don't know which
                     // one is a read and which is a write.
                     return ExtRelation.From.DerefLevel == 0 &&
                            ExtRelation.To.DerefLevel == 0;
                   });
   return AccessArgMemoryOnly ? FMRB_OnlyAccessesArgumentPointees
                              : FMRB_UnknownModRefBehavior;
 }

 char CFLAA::PassID;

 CFLAAResult CFLAA::run(Function &F, AnalysisManager<Function> &AM) {
   return CFLAAResult(AM.getResult<TargetLibraryAnalysis>(F));
 }

 char CFLAAWrapperPass::ID = 0;
 INITIALIZE_PASS(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis", false,
                 true)

 ImmutablePass *llvm::createCFLAAWrapperPass() { return new CFLAAWrapperPass(); }

 CFLAAWrapperPass::CFLAAWrapperPass() : ImmutablePass(ID) {
   initializeCFLAAWrapperPassPass(*PassRegistry::getPassRegistry());
 }

 void CFLAAWrapperPass::initializePass() {
   auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
   Result.reset(new CFLAAResult(TLIWP.getTLI()));
 }

 void CFLAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<TargetLibraryInfoWrapperPass>();
 }