blob: cfa5f542cfe6f6d3da85e12c711e9e952b007b61 [file] [log] [blame]
George Burgess IV1ca8aff2016-07-06 00:36:12 +00001//======- CFLGraph.h - Abstract stratified sets implementation. --------======//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This file defines CFLGraph, an auxiliary data structure used by CFL-based
11/// alias analysis.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ANALYSIS_CFLGRAPH_H
16#define LLVM_ANALYSIS_CFLGRAPH_H
17
George Burgess IVe1919962016-07-06 00:47:21 +000018#include "AliasAnalysisSummary.h"
19#include "llvm/ADT/STLExtras.h"
George Burgess IVc294d0d2016-07-09 02:54:42 +000020#include "llvm/Analysis/MemoryBuiltins.h"
21#include "llvm/IR/InstVisitor.h"
22#include "llvm/IR/Instructions.h"
George Burgess IV1ca8aff2016-07-06 00:36:12 +000023
24namespace llvm {
George Burgess IV1ca8aff2016-07-06 00:36:12 +000025namespace cflaa {
George Burgess IV1ca8aff2016-07-06 00:36:12 +000026/// Edges can be one of four "weights" -- each weight must have an inverse
27/// weight (Assign has Assign; Reference has Dereference).
28enum class EdgeType {
29 /// The weight assigned when assigning from or to a value. For example, in:
30 /// %b = getelementptr %a, 0
31 /// ...The relationships are %b assign %a, and %a assign %b. This used to be
32 /// two edges, but having a distinction bought us nothing.
33 Assign,
34
35 /// The edge used when we have an edge going from some handle to a Value.
36 /// Examples of this include:
37 /// %b = load %a (%b Dereference %a)
38 /// %b = extractelement %a, 0 (%a Dereference %b)
39 Dereference,
40
41 /// The edge used when our edge goes from a value to a handle that may have
42 /// contained it at some point. Examples:
43 /// %b = load %a (%a Reference %b)
44 /// %b = extractelement %a, 0 (%b Reference %a)
45 Reference
46};
47
48/// \brief The Program Expression Graph (PEG) of CFL analysis
49/// CFLGraph is auxiliary data structure used by CFL-based alias analysis to
50/// describe flow-insensitive pointer-related behaviors. Given an LLVM function,
51/// the main purpose of this graph is to abstract away unrelated facts and
52/// translate the rest into a form that can be easily digested by CFL analyses.
53class CFLGraph {
54 typedef Value *Node;
55
56 struct Edge {
57 EdgeType Type;
58 Node Other;
59 };
60
61 typedef std::vector<Edge> EdgeList;
62
63 struct NodeInfo {
64 EdgeList Edges;
George Burgess IVe1919962016-07-06 00:47:21 +000065 AliasAttrs Attr;
George Burgess IV1ca8aff2016-07-06 00:36:12 +000066 };
67
68 typedef DenseMap<Node, NodeInfo> NodeMap;
69 NodeMap NodeImpls;
70
71 // Gets the inverse of a given EdgeType.
72 static EdgeType flipWeight(EdgeType Initial) {
73 switch (Initial) {
74 case EdgeType::Assign:
75 return EdgeType::Assign;
76 case EdgeType::Dereference:
77 return EdgeType::Reference;
78 case EdgeType::Reference:
79 return EdgeType::Dereference;
80 }
81 llvm_unreachable("Incomplete coverage of EdgeType enum");
82 }
83
84 const NodeInfo *getNode(Node N) const {
85 auto Itr = NodeImpls.find(N);
86 if (Itr == NodeImpls.end())
87 return nullptr;
88 return &Itr->second;
89 }
90 NodeInfo *getNode(Node N) {
91 auto Itr = NodeImpls.find(N);
92 if (Itr == NodeImpls.end())
93 return nullptr;
94 return &Itr->second;
95 }
96
97 static Node nodeDeref(const NodeMap::value_type &P) { return P.first; }
98 typedef std::pointer_to_unary_function<const NodeMap::value_type &, Node>
99 NodeDerefFun;
100
101public:
102 typedef EdgeList::const_iterator const_edge_iterator;
103 typedef mapped_iterator<NodeMap::const_iterator, NodeDerefFun>
104 const_node_iterator;
105
106 bool addNode(Node N) {
George Burgess IVe1919962016-07-06 00:47:21 +0000107 return NodeImpls
108 .insert(std::make_pair(N, NodeInfo{EdgeList(), getAttrNone()}))
109 .second;
George Burgess IV1ca8aff2016-07-06 00:36:12 +0000110 }
111
George Burgess IVe1919962016-07-06 00:47:21 +0000112 void addAttr(Node N, AliasAttrs Attr) {
George Burgess IV1ca8aff2016-07-06 00:36:12 +0000113 auto *Info = getNode(N);
114 assert(Info != nullptr);
115 Info->Attr |= Attr;
116 }
117
118 void addEdge(Node From, Node To, EdgeType Type) {
119 auto *FromInfo = getNode(From);
120 assert(FromInfo != nullptr);
121 auto *ToInfo = getNode(To);
122 assert(ToInfo != nullptr);
123
124 FromInfo->Edges.push_back(Edge{Type, To});
125 ToInfo->Edges.push_back(Edge{flipWeight(Type), From});
126 }
127
George Burgess IVe1919962016-07-06 00:47:21 +0000128 AliasAttrs attrFor(Node N) const {
George Burgess IV1ca8aff2016-07-06 00:36:12 +0000129 auto *Info = getNode(N);
130 assert(Info != nullptr);
131 return Info->Attr;
132 }
133
134 iterator_range<const_edge_iterator> edgesFor(Node N) const {
135 auto *Info = getNode(N);
136 assert(Info != nullptr);
137 auto &Edges = Info->Edges;
138 return make_range(Edges.begin(), Edges.end());
139 }
140
141 iterator_range<const_node_iterator> nodes() const {
142 return make_range<const_node_iterator>(
143 map_iterator(NodeImpls.begin(), NodeDerefFun(nodeDeref)),
144 map_iterator(NodeImpls.end(), NodeDerefFun(nodeDeref)));
145 }
146
147 bool empty() const { return NodeImpls.empty(); }
148 std::size_t size() const { return NodeImpls.size(); }
149};
George Burgess IVc294d0d2016-07-09 02:54:42 +0000150
151///\brief A builder class used to create CFLGraph instance from a given function
152/// The CFL-AA that uses this builder must provide its own type as a template
153/// argument. This is necessary for interprocedural processing: CFLGraphBuilder
154/// needs a way of obtaining the summary of other functions when callinsts are
155/// encountered.
156/// As a result, we expect the said CFL-AA to expose a getAliasSummary() public
157/// member function that takes a Function& and returns the corresponding summary
158/// as a const AliasSummary*.
159template <typename CFLAA> class CFLGraphBuilder {
160 // Input of the builder
161 CFLAA &Analysis;
162 const TargetLibraryInfo &TLI;
163
164 // Output of the builder
165 CFLGraph Graph;
166 SmallVector<Value *, 4> ReturnedValues;
167
168 // Auxiliary structures used by the builder
169 SmallVector<InstantiatedRelation, 8> InstantiatedRelations;
170 SmallVector<InstantiatedAttr, 8> InstantiatedAttrs;
171
172 // Helper class
173 /// Gets the edges our graph should have, based on an Instruction*
174 class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
175 CFLAA &AA;
176 const TargetLibraryInfo &TLI;
177
178 CFLGraph &Graph;
179 SmallVectorImpl<Value *> &ReturnValues;
180 SmallVectorImpl<InstantiatedRelation> &InstantiatedRelations;
181 SmallVectorImpl<InstantiatedAttr> &InstantiatedAttrs;
182
183 static bool hasUsefulEdges(ConstantExpr *CE) {
184 // ConstantExpr doesn't have terminators, invokes, or fences, so only
185 // needs
186 // to check for compares.
187 return CE->getOpcode() != Instruction::ICmp &&
188 CE->getOpcode() != Instruction::FCmp;
189 }
190
191 // Returns possible functions called by CS into the given SmallVectorImpl.
192 // Returns true if targets found, false otherwise.
193 static bool getPossibleTargets(CallSite CS,
194 SmallVectorImpl<Function *> &Output) {
195 if (auto *Fn = CS.getCalledFunction()) {
196 Output.push_back(Fn);
197 return true;
198 }
199
200 // TODO: If the call is indirect, we might be able to enumerate all
201 // potential
202 // targets of the call and return them, rather than just failing.
203 return false;
204 }
205
206 void addNode(Value *Val) {
207 assert(Val != nullptr);
208 if (!Graph.addNode(Val))
209 return;
210
211 if (isa<GlobalValue>(Val)) {
212 Graph.addAttr(Val, getGlobalOrArgAttrFromValue(*Val));
213 // Currently we do not attempt to be smart on globals
214 InstantiatedAttrs.push_back(
215 InstantiatedAttr{InstantiatedValue{Val, 1}, getAttrUnknown()});
216 } else if (auto CExpr = dyn_cast<ConstantExpr>(Val))
217 if (hasUsefulEdges(CExpr))
218 visitConstantExpr(CExpr);
219 }
220
221 void addNodeWithAttr(Value *Val, AliasAttrs Attr) {
222 addNode(Val);
223 Graph.addAttr(Val, Attr);
224 }
225
226 void addEdge(Value *From, Value *To, EdgeType Type) {
227 assert(From != nullptr && To != nullptr);
228 if (!From->getType()->isPointerTy() || !To->getType()->isPointerTy())
229 return;
230 addNode(From);
231 if (To != From)
232 addNode(To);
233 Graph.addEdge(From, To, Type);
234 }
235
236 public:
237 GetEdgesVisitor(CFLGraphBuilder &Builder)
238 : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph),
239 ReturnValues(Builder.ReturnedValues),
240 InstantiatedRelations(Builder.InstantiatedRelations),
241 InstantiatedAttrs(Builder.InstantiatedAttrs) {}
242
243 void visitInstruction(Instruction &) {
244 llvm_unreachable("Unsupported instruction encountered");
245 }
246
247 void visitReturnInst(ReturnInst &Inst) {
248 if (auto RetVal = Inst.getReturnValue()) {
249 if (RetVal->getType()->isPointerTy()) {
250 addNode(RetVal);
251 ReturnValues.push_back(RetVal);
252 }
253 }
254 }
255
256 void visitPtrToIntInst(PtrToIntInst &Inst) {
257 auto *Ptr = Inst.getOperand(0);
258 addNodeWithAttr(Ptr, getAttrEscaped());
259 }
260
261 void visitIntToPtrInst(IntToPtrInst &Inst) {
262 auto *Ptr = &Inst;
263 addNodeWithAttr(Ptr, getAttrUnknown());
264 }
265
266 void visitCastInst(CastInst &Inst) {
267 auto *Src = Inst.getOperand(0);
268 addEdge(Src, &Inst, EdgeType::Assign);
269 }
270
271 void visitBinaryOperator(BinaryOperator &Inst) {
272 auto *Op1 = Inst.getOperand(0);
273 auto *Op2 = Inst.getOperand(1);
274 addEdge(Op1, &Inst, EdgeType::Assign);
275 addEdge(Op2, &Inst, EdgeType::Assign);
276 }
277
278 void visitAtomicCmpXchgInst(AtomicCmpXchgInst &Inst) {
279 auto *Ptr = Inst.getPointerOperand();
280 auto *Val = Inst.getNewValOperand();
281 addEdge(Ptr, Val, EdgeType::Dereference);
282 }
283
284 void visitAtomicRMWInst(AtomicRMWInst &Inst) {
285 auto *Ptr = Inst.getPointerOperand();
286 auto *Val = Inst.getValOperand();
287 addEdge(Ptr, Val, EdgeType::Dereference);
288 }
289
290 void visitPHINode(PHINode &Inst) {
291 for (Value *Val : Inst.incoming_values())
292 addEdge(Val, &Inst, EdgeType::Assign);
293 }
294
295 void visitGetElementPtrInst(GetElementPtrInst &Inst) {
296 auto *Op = Inst.getPointerOperand();
297 addEdge(Op, &Inst, EdgeType::Assign);
298 }
299
300 void visitSelectInst(SelectInst &Inst) {
301 // Condition is not processed here (The actual statement producing
302 // the condition result is processed elsewhere). For select, the
303 // condition is evaluated, but not loaded, stored, or assigned
304 // simply as a result of being the condition of a select.
305
306 auto *TrueVal = Inst.getTrueValue();
307 auto *FalseVal = Inst.getFalseValue();
308 addEdge(TrueVal, &Inst, EdgeType::Assign);
309 addEdge(FalseVal, &Inst, EdgeType::Assign);
310 }
311
312 void visitAllocaInst(AllocaInst &Inst) { Graph.addNode(&Inst); }
313
314 void visitLoadInst(LoadInst &Inst) {
315 auto *Ptr = Inst.getPointerOperand();
316 auto *Val = &Inst;
317 addEdge(Val, Ptr, EdgeType::Reference);
318 }
319
320 void visitStoreInst(StoreInst &Inst) {
321 auto *Ptr = Inst.getPointerOperand();
322 auto *Val = Inst.getValueOperand();
323 addEdge(Ptr, Val, EdgeType::Dereference);
324 }
325
326 void visitVAArgInst(VAArgInst &Inst) {
327 // We can't fully model va_arg here. For *Ptr = Inst.getOperand(0), it
328 // does
329 // two things:
330 // 1. Loads a value from *((T*)*Ptr).
331 // 2. Increments (stores to) *Ptr by some target-specific amount.
332 // For now, we'll handle this like a landingpad instruction (by placing
333 // the
334 // result in its own group, and having that group alias externals).
335 addNodeWithAttr(&Inst, getAttrUnknown());
336 }
337
338 static bool isFunctionExternal(Function *Fn) {
339 return !Fn->hasExactDefinition();
340 }
341
342 bool tryInterproceduralAnalysis(CallSite CS,
343 const SmallVectorImpl<Function *> &Fns) {
344 assert(Fns.size() > 0);
345
346 if (CS.arg_size() > MaxSupportedArgsInSummary)
347 return false;
348
349 // Exit early if we'll fail anyway
350 for (auto *Fn : Fns) {
351 if (isFunctionExternal(Fn) || Fn->isVarArg())
352 return false;
353 // Fail if the caller does not provide enough arguments
354 assert(Fn->arg_size() <= CS.arg_size());
355 if (!AA.getAliasSummary(*Fn))
356 return false;
357 }
358
359 for (auto *Fn : Fns) {
360 auto Summary = AA.getAliasSummary(*Fn);
361 assert(Summary != nullptr);
362
363 auto &RetParamRelations = Summary->RetParamRelations;
364 for (auto &Relation : RetParamRelations) {
365 auto IRelation = instantiateExternalRelation(Relation, CS);
366 if (IRelation.hasValue())
367 InstantiatedRelations.push_back(*IRelation);
368 }
369
370 auto &RetParamAttributes = Summary->RetParamAttributes;
371 for (auto &Attribute : RetParamAttributes) {
372 auto IAttr = instantiateExternalAttribute(Attribute, CS);
373 if (IAttr.hasValue())
374 InstantiatedAttrs.push_back(*IAttr);
375 }
376 }
377
378 return true;
379 }
380
381 void visitCallSite(CallSite CS) {
382 auto Inst = CS.getInstruction();
383
384 // Make sure all arguments and return value are added to the graph first
385 for (Value *V : CS.args())
386 addNode(V);
387 if (Inst->getType()->isPointerTy())
388 addNode(Inst);
389
390 // Check if Inst is a call to a library function that
391 // allocates/deallocates
392 // on the heap. Those kinds of functions do not introduce any aliases.
393 // TODO: address other common library functions such as realloc(),
394 // strdup(),
395 // etc.
396 if (isMallocLikeFn(Inst, &TLI) || isCallocLikeFn(Inst, &TLI) ||
397 isFreeCall(Inst, &TLI))
398 return;
399
400 // TODO: Add support for noalias args/all the other fun function
401 // attributes
402 // that we can tack on.
403 SmallVector<Function *, 4> Targets;
404 if (getPossibleTargets(CS, Targets))
405 if (tryInterproceduralAnalysis(CS, Targets))
406 return;
407
408 // Because the function is opaque, we need to note that anything
409 // could have happened to the arguments (unless the function is marked
410 // readonly or readnone), and that the result could alias just about
411 // anything, too (unless the result is marked noalias).
412 if (!CS.onlyReadsMemory())
413 for (Value *V : CS.args()) {
414 if (V->getType()->isPointerTy()) {
415 // The argument itself escapes.
416 addNodeWithAttr(V, getAttrEscaped());
417 // The fate of argument memory is unknown. Note that since
418 // AliasAttrs
419 // is transitive with respect to dereference, we only need to
420 // specify
421 // it for the first-level memory.
422 InstantiatedAttrs.push_back(
423 InstantiatedAttr{InstantiatedValue{V, 1}, getAttrUnknown()});
424 }
425 }
426
427 if (Inst->getType()->isPointerTy()) {
428 auto *Fn = CS.getCalledFunction();
429 if (Fn == nullptr || !Fn->doesNotAlias(0))
430 // No need to call addNodeWithAttr() since we've added Inst at the
431 // beginning of this function and we know it is not a global.
432 Graph.addAttr(Inst, getAttrUnknown());
433 }
434 }
435
436 /// Because vectors/aggregates are immutable and unaddressable, there's
437 /// nothing we can do to coax a value out of them, other than calling
438 /// Extract{Element,Value}. We can effectively treat them as pointers to
439 /// arbitrary memory locations we can store in and load from.
440 void visitExtractElementInst(ExtractElementInst &Inst) {
441 auto *Ptr = Inst.getVectorOperand();
442 auto *Val = &Inst;
443 addEdge(Val, Ptr, EdgeType::Reference);
444 }
445
446 void visitInsertElementInst(InsertElementInst &Inst) {
447 auto *Vec = Inst.getOperand(0);
448 auto *Val = Inst.getOperand(1);
449 addEdge(Vec, &Inst, EdgeType::Assign);
450 addEdge(&Inst, Val, EdgeType::Dereference);
451 }
452
453 void visitLandingPadInst(LandingPadInst &Inst) {
454 // Exceptions come from "nowhere", from our analysis' perspective.
455 // So we place the instruction its own group, noting that said group may
456 // alias externals
457 addNodeWithAttr(&Inst, getAttrUnknown());
458 }
459
460 void visitInsertValueInst(InsertValueInst &Inst) {
461 auto *Agg = Inst.getOperand(0);
462 auto *Val = Inst.getOperand(1);
463 addEdge(Agg, &Inst, EdgeType::Assign);
464 addEdge(&Inst, Val, EdgeType::Dereference);
465 }
466
467 void visitExtractValueInst(ExtractValueInst &Inst) {
468 auto *Ptr = Inst.getAggregateOperand();
469 addEdge(&Inst, Ptr, EdgeType::Reference);
470 }
471
472 void visitShuffleVectorInst(ShuffleVectorInst &Inst) {
473 auto *From1 = Inst.getOperand(0);
474 auto *From2 = Inst.getOperand(1);
475 addEdge(From1, &Inst, EdgeType::Assign);
476 addEdge(From2, &Inst, EdgeType::Assign);
477 }
478
479 void visitConstantExpr(ConstantExpr *CE) {
480 switch (CE->getOpcode()) {
481 default:
482 llvm_unreachable("Unknown instruction type encountered!");
483// Build the switch statement using the Instruction.def file.
484#define HANDLE_INST(NUM, OPCODE, CLASS) \
485 case Instruction::OPCODE: \
486 this->visit##OPCODE(*(CLASS *)CE); \
487 break;
488#include "llvm/IR/Instruction.def"
489 }
490 }
491 };
492
493 // Helper functions
494
495 // Determines whether or not we an instruction is useless to us (e.g.
496 // FenceInst)
497 static bool hasUsefulEdges(Instruction *Inst) {
498 bool IsNonInvokeRetTerminator = isa<TerminatorInst>(Inst) &&
499 !isa<InvokeInst>(Inst) &&
500 !isa<ReturnInst>(Inst);
501 return !isa<CmpInst>(Inst) && !isa<FenceInst>(Inst) &&
502 !IsNonInvokeRetTerminator;
503 }
504
505 void addArgumentToGraph(Argument &Arg) {
506 if (Arg.getType()->isPointerTy()) {
507 Graph.addNode(&Arg);
508 Graph.addAttr(&Arg, getGlobalOrArgAttrFromValue(Arg));
509 // Pointees of a formal parameter is known to the caller
510 InstantiatedAttrs.push_back(
511 InstantiatedAttr{InstantiatedValue{&Arg, 1}, getAttrCaller()});
512 }
513 }
514
515 // Given an Instruction, this will add it to the graph, along with any
516 // Instructions that are potentially only available from said Instruction
517 // For example, given the following line:
518 // %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
519 // addInstructionToGraph would add both the `load` and `getelementptr`
520 // instructions to the graph appropriately.
521 void addInstructionToGraph(Instruction &Inst) {
522 if (!hasUsefulEdges(&Inst))
523 return;
524
525 GetEdgesVisitor(*this).visit(Inst);
526 }
527
528 // Builds the graph needed for constructing the StratifiedSets for the given
529 // function
530 void buildGraphFrom(Function &Fn) {
531 for (auto &Bb : Fn.getBasicBlockList())
532 for (auto &Inst : Bb.getInstList())
533 addInstructionToGraph(Inst);
534
535 for (auto &Arg : Fn.args())
536 addArgumentToGraph(Arg);
537 }
538
539public:
540 CFLGraphBuilder(CFLAA &Analysis, const TargetLibraryInfo &TLI, Function &Fn)
541 : Analysis(Analysis), TLI(TLI) {
542 buildGraphFrom(Fn);
543 }
544
545 const CFLGraph &getCFLGraph() const { return Graph; }
546 const SmallVector<Value *, 4> &getReturnValues() const {
547 return ReturnedValues;
548 }
549 const SmallVector<InstantiatedRelation, 8> &getInstantiatedRelations() const {
550 return InstantiatedRelations;
551 }
552 const SmallVector<InstantiatedAttr, 8> &getInstantiatedAttrs() const {
553 return InstantiatedAttrs;
554 }
555};
George Burgess IV1ca8aff2016-07-06 00:36:12 +0000556}
557}
558
559#endif