blob: 7b1a4170408a19670159420e3c86599b88beb384 [file] [log] [blame]
Nuno Lopes5c525b52012-05-22 17:19:09 +00001//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a pass that instruments the code to perform run-time
11// bounds checking on loads, stores, and other memory intrinsics.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "bounds-checking"
16#include "llvm/Transforms/Scalar.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/MemoryBuiltins.h"
19#include "llvm/Support/Debug.h"
20#include "llvm/Support/InstIterator.h"
21#include "llvm/Support/IRBuilder.h"
Nuno Lopes2f0a7482012-05-22 22:02:19 +000022#include "llvm/Support/raw_ostream.h"
Nuno Lopes5c525b52012-05-22 17:19:09 +000023#include "llvm/Support/TargetFolder.h"
24#include "llvm/Target/TargetData.h"
25#include "llvm/Transforms/Utils/Local.h"
26#include "llvm/GlobalVariable.h"
27#include "llvm/Instructions.h"
28#include "llvm/Intrinsics.h"
Nuno Lopes6a06e682012-05-25 16:54:04 +000029#include "llvm/Metadata.h"
Nuno Lopes5c525b52012-05-22 17:19:09 +000030#include "llvm/Operator.h"
31#include "llvm/Pass.h"
32using namespace llvm;
33
34STATISTIC(ChecksAdded, "Bounds checks added");
35STATISTIC(ChecksSkipped, "Bounds checks skipped");
36STATISTIC(ChecksUnable, "Bounds checks unable to add");
37
38typedef IRBuilder<true, TargetFolder> BuilderTy;
39
40namespace {
41 enum ConstTriState {
42 NotConst, Const, Dunno
43 };
44
45 struct BoundsChecking : public FunctionPass {
Nuno Lopes5c525b52012-05-22 17:19:09 +000046 static char ID;
47
48 BoundsChecking(unsigned _Penalty = 5) : FunctionPass(ID), Penalty(_Penalty){
49 initializeBoundsCheckingPass(*PassRegistry::getPassRegistry());
50 }
51
Nuno Lopes5c525b52012-05-22 17:19:09 +000052 virtual bool runOnFunction(Function &F);
53
54 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
55 AU.addRequired<TargetData>();
56 }
Nuno Lopes2f0a7482012-05-22 22:02:19 +000057
58 private:
59 const TargetData *TD;
60 BuilderTy *Builder;
61 Function *Fn;
62 BasicBlock *TrapBB;
63 unsigned Penalty;
64
65 BasicBlock *getTrapBB();
Nuno Lopes2b526302012-05-23 16:24:52 +000066 void emitBranchToTrap(Value *Cmp = 0);
Nuno Lopes2f0a7482012-05-22 22:02:19 +000067 ConstTriState computeAllocSize(Value *Alloc, uint64_t &Size,
68 Value* &SizeValue);
69 bool instrument(Value *Ptr, Value *Val);
Nuno Lopes5c525b52012-05-22 17:19:09 +000070 };
71}
72
73char BoundsChecking::ID = 0;
74INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking",
75 false, false)
76
77
78/// getTrapBB - create a basic block that traps. All overflowing conditions
79/// branch to this block. There's only one trap block per function.
80BasicBlock *BoundsChecking::getTrapBB() {
81 if (TrapBB)
82 return TrapBB;
83
84 BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint();
85 TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
86 Builder->SetInsertPoint(TrapBB);
87
88 llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
89 CallInst *TrapCall = Builder->CreateCall(F);
90 TrapCall->setDoesNotReturn();
91 TrapCall->setDoesNotThrow();
92 Builder->CreateUnreachable();
93
94 Builder->SetInsertPoint(PrevInsertPoint);
95 return TrapBB;
96}
97
98
Nuno Lopes2b526302012-05-23 16:24:52 +000099/// emitBranchToTrap - emit a branch instruction to a trap block.
100/// If Cmp is non-null, perform a jump only if its value evaluates to true.
101void BoundsChecking::emitBranchToTrap(Value *Cmp) {
102 Instruction *Inst = Builder->GetInsertPoint();
103 BasicBlock *OldBB = Inst->getParent();
104 BasicBlock *Cont = OldBB->splitBasicBlock(Inst);
105 OldBB->getTerminator()->eraseFromParent();
106
Nuno Lopes2b526302012-05-23 16:24:52 +0000107 if (Cmp)
108 BranchInst::Create(getTrapBB(), Cont, Cmp, OldBB);
109 else
110 BranchInst::Create(getTrapBB(), OldBB);
111}
112
113
Nuno Lopes5c525b52012-05-22 17:19:09 +0000114/// computeAllocSize - compute the object size allocated by an allocation
115/// site. Returns NotConst if the size is not constant (in SizeValue), Const if
116/// the size is constant (in Size), and Dunno if the size could not be
117/// determined within the given maximum Penalty that the computation would
118/// incurr at run-time.
119ConstTriState BoundsChecking::computeAllocSize(Value *Alloc, uint64_t &Size,
120 Value* &SizeValue) {
Nuno Lopes6a06e682012-05-25 16:54:04 +0000121 IntegerType *RetTy = TD->getIntPtrType(Fn->getContext());
122
123 // global variable with definitive size
Nuno Lopes5c525b52012-05-22 17:19:09 +0000124 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Alloc)) {
125 if (GV->hasDefinitiveInitializer()) {
126 Constant *C = GV->getInitializer();
127 Size = TD->getTypeAllocSize(C->getType());
128 return Const;
129 }
130 return Dunno;
131
Nuno Lopes6a06e682012-05-25 16:54:04 +0000132 // stack allocation
Nuno Lopes5c525b52012-05-22 17:19:09 +0000133 } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Alloc)) {
134 if (!AI->getAllocatedType()->isSized())
135 return Dunno;
136
137 Size = TD->getTypeAllocSize(AI->getAllocatedType());
138 if (!AI->isArrayAllocation())
139 return Const; // we are done
140
141 Value *ArraySize = AI->getArraySize();
142 if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) {
143 Size *= C->getZExtValue();
144 return Const;
145 }
146
147 if (Penalty < 2)
148 return Dunno;
149
150 SizeValue = ConstantInt::get(ArraySize->getType(), Size);
151 SizeValue = Builder->CreateMul(SizeValue, ArraySize);
152 return NotConst;
153
Nuno Lopesd72d67d2012-05-25 21:15:17 +0000154 // function arguments
155 } else if (Argument *A = dyn_cast<Argument>(Alloc)) {
156 if (!A->hasByValAttr())
157 return Dunno;
158
159 PointerType *PT = cast<PointerType>(A->getType());
160 Size = TD->getTypeAllocSize(PT->getElementType());
161 return Const;
162
Nuno Lopes6a06e682012-05-25 16:54:04 +0000163 // ptr = select(ptr1, ptr2)
164 } else if (SelectInst *SI = dyn_cast<SelectInst>(Alloc)) {
165 uint64_t SizeFalse;
166 Value *SizeValueFalse;
167 ConstTriState TrueConst = computeAllocSize(SI->getTrueValue(), Size,
168 SizeValue);
169 ConstTriState FalseConst = computeAllocSize(SI->getFalseValue(), SizeFalse,
170 SizeValueFalse);
171
172 if (TrueConst == Const && FalseConst == Const && Size == SizeFalse)
173 return Const;
174
175 if (Penalty < 2 || (TrueConst == Dunno && FalseConst == Dunno))
176 return Dunno;
177
178 // if one of the branches is Dunno, assume it is ok and check just the other
179 APInt MaxSize = APInt::getMaxValue(TD->getTypeSizeInBits(RetTy));
180
181 if (TrueConst == Const)
182 SizeValue = ConstantInt::get(RetTy, Size);
183 else if (TrueConst == Dunno)
184 SizeValue = ConstantInt::get(RetTy, MaxSize);
185
186 if (FalseConst == Const)
187 SizeValueFalse = ConstantInt::get(RetTy, SizeFalse);
188 else if (FalseConst == Dunno)
189 SizeValueFalse = ConstantInt::get(RetTy, MaxSize);
190
191 SizeValue = Builder->CreateSelect(SI->getCondition(), SizeValue,
192 SizeValueFalse);
193 return NotConst;
194
195 // call allocation function
Nuno Lopes2f0a7482012-05-22 22:02:19 +0000196 } else if (CallInst *CI = dyn_cast<CallInst>(Alloc)) {
Nuno Lopes6a06e682012-05-25 16:54:04 +0000197 SmallVector<unsigned, 4> Args;
Nuno Lopes5c525b52012-05-22 17:19:09 +0000198
Nuno Lopes6a06e682012-05-25 16:54:04 +0000199 if (MDNode *MD = CI->getMetadata("alloc_size")) {
200 for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
201 Args.push_back(cast<ConstantInt>(MD->getOperand(i))->getZExtValue());
202
203 } else if (Function *Callee = CI->getCalledFunction()) {
204 FunctionType *FTy = Callee->getFunctionType();
205
Nuno Lopes2f0a7482012-05-22 22:02:19 +0000206 // alloc(size)
Nuno Lopes6a06e682012-05-25 16:54:04 +0000207 if (FTy->getNumParams() == 1 && FTy->getParamType(0)->isIntegerTy()) {
208 if ((Callee->getName() == "malloc" ||
209 Callee->getName() == "valloc" ||
210 Callee->getName() == "_Znwj" || // operator new(unsigned int)
211 Callee->getName() == "_Znwm" || // operator new(unsigned long)
212 Callee->getName() == "_Znaj" || // operator new[](unsigned int)
213 Callee->getName() == "_Znam")) {
214 Args.push_back(0);
Nuno Lopes2f0a7482012-05-22 22:02:19 +0000215 }
Nuno Lopes6a06e682012-05-25 16:54:04 +0000216 } else if (FTy->getNumParams() == 2) {
217 // alloc(_, x)
218 if (FTy->getParamType(1)->isIntegerTy() &&
219 ((Callee->getName() == "realloc" ||
220 Callee->getName() == "reallocf"))) {
221 Args.push_back(1);
222
223 // alloc(x, y)
224 } else if (FTy->getParamType(0)->isIntegerTy() &&
225 FTy->getParamType(1)->isIntegerTy() &&
226 Callee->getName() == "calloc") {
227 Args.push_back(0);
228 Args.push_back(1);
229 }
Nuno Lopes2f0a7482012-05-22 22:02:19 +0000230 }
Nuno Lopes6a06e682012-05-25 16:54:04 +0000231 }
232
233 if (Args.empty())
Nuno Lopes2f0a7482012-05-22 22:02:19 +0000234 return Dunno;
Nuno Lopes2f0a7482012-05-22 22:02:19 +0000235
Nuno Lopes6a06e682012-05-25 16:54:04 +0000236 // check if all arguments are constant. if so, the object size is also const
237 bool AllConst = true;
238 for (SmallVectorImpl<unsigned>::iterator I = Args.begin(), E = Args.end();
239 I != E; ++I) {
240 if (!isa<ConstantInt>(CI->getArgOperand(*I))) {
241 AllConst = false;
242 break;
Nuno Lopes2f0a7482012-05-22 22:02:19 +0000243 }
244 }
Nuno Lopes6a06e682012-05-25 16:54:04 +0000245
246 if (AllConst) {
247 Size = 1;
248 for (SmallVectorImpl<unsigned>::iterator I = Args.begin(), E = Args.end();
249 I != E; ++I) {
250 ConstantInt *Arg = cast<ConstantInt>(CI->getArgOperand(*I));
251 Size *= (size_t)Arg->getZExtValue();
252 }
253 return Const;
254 }
255
256 if (Penalty < 2)
257 return Dunno;
258
259 // not all arguments are constant, so create a sequence of multiplications
260 bool First = true;
261 for (SmallVectorImpl<unsigned>::iterator I = Args.begin(), E = Args.end();
262 I != E; ++I) {
263 Value *Arg = CI->getArgOperand(*I);
264 if (First) {
265 SizeValue = Arg;
266 First = false;
267 continue;
268 }
269 SizeValue = Builder->CreateMul(SizeValue, Arg);
270 }
271
272 return NotConst;
273
Nuno Lopes2f0a7482012-05-22 22:02:19 +0000274 // TODO: handle more standard functions:
275 // - strdup / strndup
276 // - strcpy / strncpy
277 // - memcpy / memmove
278 // - strcat / strncat
Nuno Lopes5c525b52012-05-22 17:19:09 +0000279 }
280
Nuno Lopes6a06e682012-05-25 16:54:04 +0000281 DEBUG(dbgs() << "computeAllocSize failed:\n" << *Alloc << "\n");
Nuno Lopes5c525b52012-05-22 17:19:09 +0000282 return Dunno;
283}
284
285
Nuno Lopes2f0a7482012-05-22 22:02:19 +0000286/// instrument - adds run-time bounds checks to memory accessing instructions.
287/// Ptr is the pointer that will be read/written, and InstVal is either the
288/// result from the load or the value being stored. It is used to determine the
289/// size of memory block that is touched.
290/// Returns true if any change was made to the IR, false otherwise.
Nuno Lopes5c525b52012-05-22 17:19:09 +0000291bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
292 uint64_t NeededSize = TD->getTypeStoreSize(InstVal->getType());
293 DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
294 << " bytes\n");
295
296 Type *SizeTy = Type::getInt64Ty(Fn->getContext());
297
298 // Get to the real allocated thing and offset as fast as possible.
299 Ptr = Ptr->stripPointerCasts();
300 GEPOperator *GEP;
301
302 if ((GEP = dyn_cast<GEPOperator>(Ptr))) {
303 // check if we will be able to get the offset
304 if (!GEP->hasAllConstantIndices() && Penalty < 2) {
305 ++ChecksUnable;
306 return false;
307 }
308 Ptr = GEP->getPointerOperand()->stripPointerCasts();
309 }
310
311 uint64_t Size = 0;
312 Value *SizeValue = 0;
313 ConstTriState ConstAlloc = computeAllocSize(Ptr, Size, SizeValue);
314 if (ConstAlloc == Dunno) {
315 ++ChecksUnable;
316 return false;
317 }
318 assert(ConstAlloc == Const || SizeValue);
319
320 uint64_t Offset = 0;
321 Value *OffsetValue = 0;
322
323 if (GEP) {
324 if (GEP->hasAllConstantIndices()) {
325 SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
326 assert(GEP->getPointerOperandType()->isPointerTy());
327 Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
328 } else {
329 OffsetValue = EmitGEPOffset(Builder, *TD, GEP);
330 }
331 }
332
333 if (!OffsetValue && ConstAlloc == Const) {
334 if (Size < Offset || (Size - Offset) < NeededSize) {
335 // Out of bounds
Nuno Lopes2b526302012-05-23 16:24:52 +0000336 emitBranchToTrap();
Nuno Lopes5c525b52012-05-22 17:19:09 +0000337 ++ChecksAdded;
338 return true;
339 }
340 // in bounds
341 ++ChecksSkipped;
342 return false;
343 }
344
345 if (OffsetValue)
346 OffsetValue = Builder->CreateZExt(OffsetValue, SizeTy);
347 else
348 OffsetValue = ConstantInt::get(SizeTy, Offset);
349
350 if (SizeValue)
351 SizeValue = Builder->CreateZExt(SizeValue, SizeTy);
352 else
353 SizeValue = ConstantInt::get(SizeTy, Size);
354
355 Value *NeededSizeVal = ConstantInt::get(SizeTy, NeededSize);
356 Value *ObjSize = Builder->CreateSub(SizeValue, OffsetValue);
357 Value *Cmp1 = Builder->CreateICmpULT(SizeValue, OffsetValue);
358 Value *Cmp2 = Builder->CreateICmpULT(ObjSize, NeededSizeVal);
359 Value *Or = Builder->CreateOr(Cmp1, Cmp2);
Nuno Lopes2b526302012-05-23 16:24:52 +0000360 emitBranchToTrap(Or);
Nuno Lopes5c525b52012-05-22 17:19:09 +0000361
Nuno Lopes5c525b52012-05-22 17:19:09 +0000362 ++ChecksAdded;
363 return true;
364}
365
366bool BoundsChecking::runOnFunction(Function &F) {
367 TD = &getAnalysis<TargetData>();
368
369 TrapBB = 0;
370 Fn = &F;
371 BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
372 Builder = &TheBuilder;
373
374 // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
375 // touching instructions
376 std::vector<Instruction*> WorkList;
377 for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
378 Instruction *I = &*i;
379 if (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicCmpXchgInst>(I) ||
380 isa<AtomicRMWInst>(I))
381 WorkList.push_back(I);
382 }
383
384 bool MadeChange = false;
Nuno Lopes2f0a7482012-05-22 22:02:19 +0000385 for (std::vector<Instruction*>::iterator i = WorkList.begin(),
386 e = WorkList.end(); i != e; ++i) {
387 Instruction *I = *i;
Nuno Lopes5c525b52012-05-22 17:19:09 +0000388
389 Builder->SetInsertPoint(I);
390 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
391 MadeChange |= instrument(LI->getPointerOperand(), LI);
392 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
393 MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand());
394 } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
395 MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand());
396 } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(I)) {
397 MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand());
398 } else {
399 llvm_unreachable("unknown Instruction type");
400 }
401 }
402 return MadeChange;
403}
404
405FunctionPass *llvm::createBoundsCheckingPass(unsigned Penalty) {
406 return new BoundsChecking(Penalty);
407}