blob: 8b328c221da326d26ac2b3a1522b311c36fadb2a [file] [log] [blame]
Anna Thomas740f5292017-07-05 01:16:29 +00001//===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Run a sanity check on the IR to ensure that Safepoints - if they've been
11// inserted - were inserted correctly. In particular, look for use of
12// non-relocated values after a safepoint. It's primary use is to check the
13// correctness of safepoint insertion immediately after insertion, but it can
14// also be used to verify that later transforms have not found a way to break
15// safepoint semenatics.
16//
17// In its current form, this verify checks a property which is sufficient, but
18// not neccessary for correctness. There are some cases where an unrelocated
19// pointer can be used after the safepoint. Consider this example:
20//
21// a = ...
22// b = ...
23// (a',b') = safepoint(a,b)
24// c = cmp eq a b
25// br c, ..., ....
26//
27// Because it is valid to reorder 'c' above the safepoint, this is legal. In
28// practice, this is a somewhat uncommon transform, but CodeGenPrep does create
Anna Thomascace0532017-07-07 13:02:29 +000029// idioms like this. The verifier knows about these cases and avoids reporting
30// false positives.
Anna Thomas740f5292017-07-05 01:16:29 +000031//
32//===----------------------------------------------------------------------===//
33
34#include "llvm/ADT/DenseSet.h"
35#include "llvm/ADT/SetOperations.h"
36#include "llvm/ADT/SetVector.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/Intrinsics.h"
42#include "llvm/IR/IntrinsicInst.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/Value.h"
45#include "llvm/IR/SafepointIRVerifier.h"
46#include "llvm/IR/Statepoint.h"
47#include "llvm/Support/Debug.h"
48#include "llvm/Support/CommandLine.h"
49#include "llvm/Support/raw_ostream.h"
50
51#define DEBUG_TYPE "safepoint-ir-verifier"
52
53using namespace llvm;
54
55/// This option is used for writing test cases. Instead of crashing the program
56/// when verification fails, report a message to the console (for FileCheck
57/// usage) and continue execution as if nothing happened.
58static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only",
59 cl::init(false));
60
61static void Verify(const Function &F, const DominatorTree &DT);
62
63struct SafepointIRVerifier : public FunctionPass {
64 static char ID; // Pass identification, replacement for typeid
65 DominatorTree DT;
66 SafepointIRVerifier() : FunctionPass(ID) {
67 initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry());
68 }
69
70 bool runOnFunction(Function &F) override {
71 DT.recalculate(F);
72 Verify(F, DT);
73 return false; // no modifications
74 }
75
76 void getAnalysisUsage(AnalysisUsage &AU) const override {
77 AU.setPreservesAll();
78 }
79
80 StringRef getPassName() const override { return "safepoint verifier"; }
81};
82
83void llvm::verifySafepointIR(Function &F) {
84 SafepointIRVerifier pass;
85 pass.runOnFunction(F);
86}
87
88char SafepointIRVerifier::ID = 0;
89
90FunctionPass *llvm::createSafepointIRVerifierPass() {
91 return new SafepointIRVerifier();
92}
93
94INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir",
95 "Safepoint IR Verifier", false, true)
96INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir",
97 "Safepoint IR Verifier", false, true)
98
99static bool isGCPointerType(Type *T) {
100 if (auto *PT = dyn_cast<PointerType>(T))
101 // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
102 // GC managed heap. We know that a pointer into this heap needs to be
103 // updated and that no other pointer does.
104 return (1 == PT->getAddressSpace());
105 return false;
106}
107
108static bool containsGCPtrType(Type *Ty) {
109 if (isGCPointerType(Ty))
110 return true;
111 if (VectorType *VT = dyn_cast<VectorType>(Ty))
112 return isGCPointerType(VT->getScalarType());
113 if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
114 return containsGCPtrType(AT->getElementType());
115 if (StructType *ST = dyn_cast<StructType>(Ty))
116 return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
117 containsGCPtrType);
118 return false;
119}
120
121// Debugging aid -- prints a [Begin, End) range of values.
122template<typename IteratorTy>
123static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) {
124 OS << "[ ";
125 while (Begin != End) {
126 OS << **Begin << " ";
127 ++Begin;
128 }
129 OS << "]";
130}
131
132/// The verifier algorithm is phrased in terms of availability. The set of
133/// values "available" at a given point in the control flow graph is the set of
134/// correctly relocated value at that point, and is a subset of the set of
135/// definitions dominating that point.
136
137/// State we compute and track per basic block.
138struct BasicBlockState {
139 // Set of values available coming in, before the phi nodes
140 DenseSet<const Value *> AvailableIn;
141
142 // Set of values available going out
143 DenseSet<const Value *> AvailableOut;
144
145 // AvailableOut minus AvailableIn.
146 // All elements are Instructions
147 DenseSet<const Value *> Contribution;
148
149 // True if this block contains a safepoint and thus AvailableIn does not
150 // contribute to AvailableOut.
151 bool Cleared = false;
152};
153
154
155/// Gather all the definitions dominating the start of BB into Result. This is
156/// simply the Defs introduced by every dominating basic block and the function
157/// arguments.
158static void GatherDominatingDefs(const BasicBlock *BB,
159 DenseSet<const Value *> &Result,
160 const DominatorTree &DT,
161 DenseMap<const BasicBlock *, BasicBlockState *> &BlockMap) {
162 DomTreeNode *DTN = DT[const_cast<BasicBlock *>(BB)];
163
164 while (DTN->getIDom()) {
165 DTN = DTN->getIDom();
166 const auto &Defs = BlockMap[DTN->getBlock()]->Contribution;
167 Result.insert(Defs.begin(), Defs.end());
168 // If this block is 'Cleared', then nothing LiveIn to this block can be
169 // available after this block completes. Note: This turns out to be
170 // really important for reducing memory consuption of the initial available
171 // sets and thus peak memory usage by this verifier.
172 if (BlockMap[DTN->getBlock()]->Cleared)
173 return;
174 }
175
176 for (const Argument &A : BB->getParent()->args())
177 if (containsGCPtrType(A.getType()))
178 Result.insert(&A);
179}
180
181/// Model the effect of an instruction on the set of available values.
182static void TransferInstruction(const Instruction &I, bool &Cleared,
183 DenseSet<const Value *> &Available) {
184 if (isStatepoint(I)) {
185 Cleared = true;
186 Available.clear();
187 } else if (containsGCPtrType(I.getType()))
188 Available.insert(&I);
189}
190
191/// Compute the AvailableOut set for BB, based on the
192/// BasicBlockState BBS, which is the BasicBlockState for BB. FirstPass is set
193/// when the verifier runs for the first time computing the AvailableOut set
194/// for BB.
195static void TransferBlock(const BasicBlock *BB,
196 BasicBlockState &BBS, bool FirstPass) {
197
198 const DenseSet<const Value *> &AvailableIn = BBS.AvailableIn;
199 DenseSet<const Value *> &AvailableOut = BBS.AvailableOut;
200
201 if (BBS.Cleared) {
202 // AvailableOut does not change no matter how the input changes, just
203 // leave it be. We need to force this calculation the first time so that
204 // we have a AvailableOut at all.
205 if (FirstPass) {
206 AvailableOut = BBS.Contribution;
207 }
208 } else {
209 // Otherwise, we need to reduce the AvailableOut set by things which are no
210 // longer in our AvailableIn
211 DenseSet<const Value *> Temp = BBS.Contribution;
212 set_union(Temp, AvailableIn);
213 AvailableOut = std::move(Temp);
214 }
215
216 DEBUG(dbgs() << "Transfered block " << BB->getName() << " from ";
217 PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end());
218 dbgs() << " to ";
219 PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end());
220 dbgs() << "\n";);
221}
222
Anna Thomasccce8532017-07-07 00:40:37 +0000223/// A given derived pointer can have multiple base pointers through phi/selects.
224/// This type indicates when the base pointer is exclusively constant
225/// (ExclusivelySomeConstant), and if that constant is proven to be exclusively
226/// null, we record that as ExclusivelyNull. In all other cases, the BaseType is
227/// NonConstant.
228enum BaseType {
229 NonConstant = 1, // Base pointers is not exclusively constant.
230 ExclusivelyNull,
231 ExclusivelySomeConstant // Base pointers for a given derived pointer is from a
232 // set of constants, but they are not exclusively
233 // null.
234};
Anna Thomas740f5292017-07-05 01:16:29 +0000235
Anna Thomasccce8532017-07-07 00:40:37 +0000236/// Return the baseType for Val which states whether Val is exclusively
237/// derived from constant/null, or not exclusively derived from constant.
238/// Val is exclusively derived off a constant base when all operands of phi and
239/// selects are derived off a constant base.
240static enum BaseType getBaseType(const Value *Val) {
Anna Thomas740f5292017-07-05 01:16:29 +0000241
Anna Thomasccce8532017-07-07 00:40:37 +0000242 SmallVector<const Value *, 32> Worklist;
243 DenseSet<const Value *> Visited;
244 bool isExclusivelyDerivedFromNull = true;
245 Worklist.push_back(Val);
246 // Strip through all the bitcasts and geps to get base pointer. Also check for
247 // the exclusive value when there can be multiple base pointers (through phis
248 // or selects).
249 while(!Worklist.empty()) {
250 const Value *V = Worklist.pop_back_val();
251 if (!Visited.insert(V).second)
252 continue;
Anna Thomas740f5292017-07-05 01:16:29 +0000253
Anna Thomasccce8532017-07-07 00:40:37 +0000254 if (const auto *CI = dyn_cast<CastInst>(V)) {
255 Worklist.push_back(CI->stripPointerCasts());
256 continue;
257 }
258 if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
259 Worklist.push_back(GEP->getPointerOperand());
260 continue;
261 }
262 // Push all the incoming values of phi node into the worklist for
263 // processing.
264 if (const auto *PN = dyn_cast<PHINode>(V)) {
265 for (Value *InV: PN->incoming_values())
266 Worklist.push_back(InV);
267 continue;
268 }
269 if (const auto *SI = dyn_cast<SelectInst>(V)) {
270 // Push in the true and false values
271 Worklist.push_back(SI->getTrueValue());
272 Worklist.push_back(SI->getFalseValue());
273 continue;
274 }
275 if (isa<Constant>(V)) {
276 // We found at least one base pointer which is non-null, so this derived
277 // pointer is not exclusively derived from null.
278 if (V != Constant::getNullValue(V->getType()))
279 isExclusivelyDerivedFromNull = false;
280 // Continue processing the remaining values to make sure it's exclusively
281 // constant.
282 continue;
283 }
284 // At this point, we know that the base pointer is not exclusively
285 // constant.
286 return BaseType::NonConstant;
Anna Thomas740f5292017-07-05 01:16:29 +0000287 }
Anna Thomasccce8532017-07-07 00:40:37 +0000288 // Now, we know that the base pointer is exclusively constant, but we need to
289 // differentiate between exclusive null constant and non-null constant.
290 return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull
291 : BaseType::ExclusivelySomeConstant;
Anna Thomas740f5292017-07-05 01:16:29 +0000292}
293
294static void Verify(const Function &F, const DominatorTree &DT) {
295 SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
296 DenseMap<const BasicBlock *, BasicBlockState *> BlockMap;
297
298 DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n");
299 if (PrintOnly)
300 dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n";
301
302
303 for (const BasicBlock &BB : F) {
304 BasicBlockState *BBS = new(BSAllocator.Allocate()) BasicBlockState;
305 for (const auto &I : BB)
306 TransferInstruction(I, BBS->Cleared, BBS->Contribution);
307 BlockMap[&BB] = BBS;
308 }
309
310 for (auto &BBI : BlockMap) {
311 GatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT, BlockMap);
312 TransferBlock(BBI.first, *BBI.second, true);
313 }
314
315 SetVector<const BasicBlock *> Worklist;
316 for (auto &BBI : BlockMap)
317 Worklist.insert(BBI.first);
318
319 // This loop iterates the AvailableIn and AvailableOut sets to a fixed point.
320 // The AvailableIn and AvailableOut sets decrease as we iterate.
321 while (!Worklist.empty()) {
322 const BasicBlock *BB = Worklist.pop_back_val();
323 BasicBlockState *BBS = BlockMap[BB];
324
325 size_t OldInCount = BBS->AvailableIn.size();
326 for (const BasicBlock *PBB : predecessors(BB))
327 set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut);
328
329 if (OldInCount == BBS->AvailableIn.size())
330 continue;
331
332 assert(OldInCount > BBS->AvailableIn.size() && "invariant!");
333
334 size_t OldOutCount = BBS->AvailableOut.size();
335 TransferBlock(BB, *BBS, false);
336 if (OldOutCount != BBS->AvailableOut.size()) {
337 assert(OldOutCount > BBS->AvailableOut.size() && "invariant!");
338 Worklist.insert(succ_begin(BB), succ_end(BB));
339 }
340 }
341
342 // We now have all the information we need to decide if the use of a heap
343 // reference is legal or not, given our safepoint semantics.
344
345 bool AnyInvalidUses = false;
346
347 auto ReportInvalidUse = [&AnyInvalidUses](const Value &V,
348 const Instruction &I) {
349 errs() << "Illegal use of unrelocated value found!\n";
350 errs() << "Def: " << V << "\n";
351 errs() << "Use: " << I << "\n";
352 if (!PrintOnly)
353 abort();
354 AnyInvalidUses = true;
355 };
356
Anna Thomasccce8532017-07-07 00:40:37 +0000357 auto isNotExclusivelyConstantDerived = [](const Value *V) {
358 return getBaseType(V) == BaseType::NonConstant;
359 };
360
Anna Thomas740f5292017-07-05 01:16:29 +0000361 for (const BasicBlock &BB : F) {
362 // We destructively modify AvailableIn as we traverse the block instruction
363 // by instruction.
364 DenseSet<const Value *> &AvailableSet = BlockMap[&BB]->AvailableIn;
365 for (const Instruction &I : BB) {
366 if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
367 if (containsGCPtrType(PN->getType()))
368 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
369 const BasicBlock *InBB = PN->getIncomingBlock(i);
370 const Value *InValue = PN->getIncomingValue(i);
371
Anna Thomasccce8532017-07-07 00:40:37 +0000372 if (isNotExclusivelyConstantDerived(InValue) &&
Anna Thomas740f5292017-07-05 01:16:29 +0000373 !BlockMap[InBB]->AvailableOut.count(InValue))
374 ReportInvalidUse(*InValue, *PN);
375 }
Anna Thomascace0532017-07-07 13:02:29 +0000376 } else if (isa<CmpInst>(I) &&
377 containsGCPtrType(I.getOperand(0)->getType())) {
378 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
379 enum BaseType baseTyLHS = getBaseType(LHS),
380 baseTyRHS = getBaseType(RHS);
381
382 // Returns true if LHS and RHS are unrelocated pointers and they are
383 // valid unrelocated uses.
384 auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, &RHS] () {
385 // A cmp instruction has valid unrelocated pointer operands only if
386 // both operands are unrelocated pointers.
387 // In the comparison between two pointers, if one is an unrelocated
388 // use, the other *should be* an unrelocated use, for this
389 // instruction to contain valid unrelocated uses. This unrelocated
390 // use can be a null constant as well, or another unrelocated
391 // pointer.
392 if (AvailableSet.count(LHS) || AvailableSet.count(RHS))
393 return false;
394 // Constant pointers (that are not exclusively null) may have
395 // meaning in different VMs, so we cannot reorder the compare
396 // against constant pointers before the safepoint. In other words,
397 // comparison of an unrelocated use against a non-null constant
398 // maybe invalid.
399 if ((baseTyLHS == BaseType::ExclusivelySomeConstant &&
400 baseTyRHS == BaseType::NonConstant) ||
401 (baseTyLHS == BaseType::NonConstant &&
402 baseTyRHS == BaseType::ExclusivelySomeConstant))
403 return false;
404 // All other cases are valid cases enumerated below:
405 // 1. Comparison between an exlusively derived null pointer and a
406 // constant base pointer.
407 // 2. Comparison between an exlusively derived null pointer and a
408 // non-constant unrelocated base pointer.
409 // 3. Comparison between 2 unrelocated pointers.
410 return true;
411 };
412 if (!hasValidUnrelocatedUse()) {
413 // Print out all non-constant derived pointers that are unrelocated
414 // uses, which are invalid.
415 if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS))
416 ReportInvalidUse(*LHS, I);
417 if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS))
418 ReportInvalidUse(*RHS, I);
419 }
Anna Thomas740f5292017-07-05 01:16:29 +0000420 } else {
421 for (const Value *V : I.operands())
422 if (containsGCPtrType(V->getType()) &&
Anna Thomasccce8532017-07-07 00:40:37 +0000423 isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V))
Anna Thomas740f5292017-07-05 01:16:29 +0000424 ReportInvalidUse(*V, I);
425 }
426
427 bool Cleared = false;
428 TransferInstruction(I, Cleared, AvailableSet);
429 (void)Cleared;
430 }
431 }
432
433 if (PrintOnly && !AnyInvalidUses) {
434 dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName()
435 << "\n";
436 }
437}