Nuno Lopes | 5c525b5 | 2012-05-22 17:19:09 +0000 | [diff] [blame] | 1 | //===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file implements a pass that instruments the code to perform run-time |
| 11 | // bounds checking on loads, stores, and other memory intrinsics. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #define DEBUG_TYPE "bounds-checking" |
| 16 | #include "llvm/Transforms/Scalar.h" |
| 17 | #include "llvm/ADT/Statistic.h" |
| 18 | #include "llvm/Analysis/MemoryBuiltins.h" |
| 19 | #include "llvm/Support/Debug.h" |
| 20 | #include "llvm/Support/InstIterator.h" |
| 21 | #include "llvm/Support/IRBuilder.h" |
Nuno Lopes | 2f0a748 | 2012-05-22 22:02:19 +0000 | [diff] [blame] | 22 | #include "llvm/Support/raw_ostream.h" |
Nuno Lopes | 5c525b5 | 2012-05-22 17:19:09 +0000 | [diff] [blame] | 23 | #include "llvm/Support/TargetFolder.h" |
| 24 | #include "llvm/Target/TargetData.h" |
| 25 | #include "llvm/Transforms/Utils/Local.h" |
| 26 | #include "llvm/GlobalVariable.h" |
| 27 | #include "llvm/Instructions.h" |
| 28 | #include "llvm/Intrinsics.h" |
| 29 | #include "llvm/Operator.h" |
| 30 | #include "llvm/Pass.h" |
| 31 | using namespace llvm; |
| 32 | |
| 33 | STATISTIC(ChecksAdded, "Bounds checks added"); |
| 34 | STATISTIC(ChecksSkipped, "Bounds checks skipped"); |
| 35 | STATISTIC(ChecksUnable, "Bounds checks unable to add"); |
| 36 | |
| 37 | typedef IRBuilder<true, TargetFolder> BuilderTy; |
| 38 | |
| 39 | namespace { |
| 40 | enum ConstTriState { |
| 41 | NotConst, Const, Dunno |
| 42 | }; |
| 43 | |
| 44 | struct BoundsChecking : public FunctionPass { |
Nuno Lopes | 5c525b5 | 2012-05-22 17:19:09 +0000 | [diff] [blame] | 45 | static char ID; |
| 46 | |
| 47 | BoundsChecking(unsigned _Penalty = 5) : FunctionPass(ID), Penalty(_Penalty){ |
| 48 | initializeBoundsCheckingPass(*PassRegistry::getPassRegistry()); |
| 49 | } |
| 50 | |
Nuno Lopes | 5c525b5 | 2012-05-22 17:19:09 +0000 | [diff] [blame] | 51 | virtual bool runOnFunction(Function &F); |
| 52 | |
| 53 | virtual void getAnalysisUsage(AnalysisUsage &AU) const { |
| 54 | AU.addRequired<TargetData>(); |
| 55 | } |
Nuno Lopes | 2f0a748 | 2012-05-22 22:02:19 +0000 | [diff] [blame] | 56 | |
| 57 | private: |
| 58 | const TargetData *TD; |
| 59 | BuilderTy *Builder; |
| 60 | Function *Fn; |
| 61 | BasicBlock *TrapBB; |
| 62 | unsigned Penalty; |
| 63 | |
| 64 | BasicBlock *getTrapBB(); |
| 65 | ConstTriState computeAllocSize(Value *Alloc, uint64_t &Size, |
| 66 | Value* &SizeValue); |
| 67 | bool instrument(Value *Ptr, Value *Val); |
Nuno Lopes | 5c525b5 | 2012-05-22 17:19:09 +0000 | [diff] [blame] | 68 | }; |
| 69 | } |
| 70 | |
| 71 | char BoundsChecking::ID = 0; |
| 72 | INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking", |
| 73 | false, false) |
| 74 | |
| 75 | |
| 76 | /// getTrapBB - create a basic block that traps. All overflowing conditions |
| 77 | /// branch to this block. There's only one trap block per function. |
| 78 | BasicBlock *BoundsChecking::getTrapBB() { |
| 79 | if (TrapBB) |
| 80 | return TrapBB; |
| 81 | |
| 82 | BasicBlock::iterator PrevInsertPoint = Builder->GetInsertPoint(); |
| 83 | TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn); |
| 84 | Builder->SetInsertPoint(TrapBB); |
| 85 | |
| 86 | llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap); |
| 87 | CallInst *TrapCall = Builder->CreateCall(F); |
| 88 | TrapCall->setDoesNotReturn(); |
| 89 | TrapCall->setDoesNotThrow(); |
| 90 | Builder->CreateUnreachable(); |
| 91 | |
| 92 | Builder->SetInsertPoint(PrevInsertPoint); |
| 93 | return TrapBB; |
| 94 | } |
| 95 | |
| 96 | |
| 97 | /// computeAllocSize - compute the object size allocated by an allocation |
| 98 | /// site. Returns NotConst if the size is not constant (in SizeValue), Const if |
| 99 | /// the size is constant (in Size), and Dunno if the size could not be |
| 100 | /// determined within the given maximum Penalty that the computation would |
| 101 | /// incurr at run-time. |
| 102 | ConstTriState BoundsChecking::computeAllocSize(Value *Alloc, uint64_t &Size, |
| 103 | Value* &SizeValue) { |
| 104 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Alloc)) { |
| 105 | if (GV->hasDefinitiveInitializer()) { |
| 106 | Constant *C = GV->getInitializer(); |
| 107 | Size = TD->getTypeAllocSize(C->getType()); |
| 108 | return Const; |
| 109 | } |
| 110 | return Dunno; |
| 111 | |
| 112 | } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Alloc)) { |
| 113 | if (!AI->getAllocatedType()->isSized()) |
| 114 | return Dunno; |
| 115 | |
| 116 | Size = TD->getTypeAllocSize(AI->getAllocatedType()); |
| 117 | if (!AI->isArrayAllocation()) |
| 118 | return Const; // we are done |
| 119 | |
| 120 | Value *ArraySize = AI->getArraySize(); |
| 121 | if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) { |
| 122 | Size *= C->getZExtValue(); |
| 123 | return Const; |
| 124 | } |
| 125 | |
| 126 | if (Penalty < 2) |
| 127 | return Dunno; |
| 128 | |
| 129 | SizeValue = ConstantInt::get(ArraySize->getType(), Size); |
| 130 | SizeValue = Builder->CreateMul(SizeValue, ArraySize); |
| 131 | return NotConst; |
| 132 | |
Nuno Lopes | 2f0a748 | 2012-05-22 22:02:19 +0000 | [diff] [blame] | 133 | } else if (CallInst *CI = dyn_cast<CallInst>(Alloc)) { |
| 134 | Function *Callee = CI->getCalledFunction(); |
| 135 | if (!Callee || !Callee->isDeclaration()) |
Nuno Lopes | 5c525b5 | 2012-05-22 17:19:09 +0000 | [diff] [blame] | 136 | return Dunno; |
| 137 | |
Nuno Lopes | 2f0a748 | 2012-05-22 22:02:19 +0000 | [diff] [blame] | 138 | FunctionType *FTy = Callee->getFunctionType(); |
| 139 | if (FTy->getNumParams() == 1) { |
| 140 | // alloc(size) |
| 141 | if ((FTy->getParamType(0)->isIntegerTy(32) || |
| 142 | FTy->getParamType(0)->isIntegerTy(64)) && |
| 143 | (Callee->getName() == "malloc" || |
| 144 | Callee->getName() == "valloc" || |
| 145 | Callee->getName() == "_Znwj" || // operator new(unsigned int) |
| 146 | Callee->getName() == "_Znwm" || // operator new(unsigned long) |
| 147 | Callee->getName() == "_Znaj" || // operator new[](unsigned int) |
| 148 | Callee->getName() == "_Znam")) { // operator new[](unsigned long) |
| 149 | SizeValue = CI->getArgOperand(0); |
| 150 | if (ConstantInt *Arg = dyn_cast<ConstantInt>(SizeValue)) { |
| 151 | Size = Arg->getZExtValue(); |
| 152 | return Const; |
| 153 | } |
| 154 | return Penalty >= 2 ? NotConst : Dunno; |
| 155 | } |
| 156 | return Dunno; |
| 157 | } |
| 158 | |
| 159 | if (FTy->getNumParams() == 2) { |
| 160 | // alloc(x, y) and return buffer of size x * y |
| 161 | if (((FTy->getParamType(0)->isIntegerTy(32) && |
| 162 | FTy->getParamType(1)->isIntegerTy(32)) || |
| 163 | (FTy->getParamType(0)->isIntegerTy(64) && |
| 164 | FTy->getParamType(1)->isIntegerTy(64))) && |
| 165 | Callee->getName() == "calloc") { |
| 166 | Value *Arg1 = CI->getArgOperand(0); |
| 167 | Value *Arg2 = CI->getArgOperand(1); |
| 168 | if (ConstantInt *CI1 = dyn_cast<ConstantInt>(Arg1)) { |
| 169 | if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Arg2)) { |
| 170 | Size = (CI1->getValue() * CI2->getValue()).getZExtValue(); |
| 171 | return Const; |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | if (Penalty < 2) |
| 176 | return Dunno; |
| 177 | |
| 178 | SizeValue = Builder->CreateMul(Arg1, Arg2); |
| 179 | return NotConst; |
| 180 | } |
| 181 | |
| 182 | // realloc(ptr, size) |
| 183 | if ((FTy->getParamType(1)->isIntegerTy(32) || |
| 184 | FTy->getParamType(1)->isIntegerTy(64)) && |
| 185 | (Callee->getName() == "realloc" || |
| 186 | Callee->getName() == "reallocf")) { |
| 187 | SizeValue = CI->getArgOperand(1); |
| 188 | if (ConstantInt *Arg = dyn_cast<ConstantInt>(SizeValue)) { |
| 189 | Size = Arg->getZExtValue(); |
| 190 | return Const; |
| 191 | } |
| 192 | return Penalty >= 2 ? NotConst : Dunno; |
| 193 | } |
| 194 | } |
| 195 | // TODO: handle more standard functions: |
| 196 | // - strdup / strndup |
| 197 | // - strcpy / strncpy |
| 198 | // - memcpy / memmove |
| 199 | // - strcat / strncat |
Nuno Lopes | 5c525b5 | 2012-05-22 17:19:09 +0000 | [diff] [blame] | 200 | } |
| 201 | |
| 202 | DEBUG(dbgs() << "computeAllocSize failed:\n" << *Alloc); |
| 203 | return Dunno; |
| 204 | } |
| 205 | |
| 206 | |
Nuno Lopes | 2f0a748 | 2012-05-22 22:02:19 +0000 | [diff] [blame] | 207 | /// instrument - adds run-time bounds checks to memory accessing instructions. |
| 208 | /// Ptr is the pointer that will be read/written, and InstVal is either the |
| 209 | /// result from the load or the value being stored. It is used to determine the |
| 210 | /// size of memory block that is touched. |
| 211 | /// Returns true if any change was made to the IR, false otherwise. |
Nuno Lopes | 5c525b5 | 2012-05-22 17:19:09 +0000 | [diff] [blame] | 212 | bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) { |
| 213 | uint64_t NeededSize = TD->getTypeStoreSize(InstVal->getType()); |
| 214 | DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) |
| 215 | << " bytes\n"); |
| 216 | |
| 217 | Type *SizeTy = Type::getInt64Ty(Fn->getContext()); |
| 218 | |
| 219 | // Get to the real allocated thing and offset as fast as possible. |
| 220 | Ptr = Ptr->stripPointerCasts(); |
| 221 | GEPOperator *GEP; |
| 222 | |
| 223 | if ((GEP = dyn_cast<GEPOperator>(Ptr))) { |
| 224 | // check if we will be able to get the offset |
| 225 | if (!GEP->hasAllConstantIndices() && Penalty < 2) { |
| 226 | ++ChecksUnable; |
| 227 | return false; |
| 228 | } |
| 229 | Ptr = GEP->getPointerOperand()->stripPointerCasts(); |
| 230 | } |
| 231 | |
| 232 | uint64_t Size = 0; |
| 233 | Value *SizeValue = 0; |
| 234 | ConstTriState ConstAlloc = computeAllocSize(Ptr, Size, SizeValue); |
| 235 | if (ConstAlloc == Dunno) { |
| 236 | ++ChecksUnable; |
| 237 | return false; |
| 238 | } |
| 239 | assert(ConstAlloc == Const || SizeValue); |
| 240 | |
| 241 | uint64_t Offset = 0; |
| 242 | Value *OffsetValue = 0; |
| 243 | |
| 244 | if (GEP) { |
| 245 | if (GEP->hasAllConstantIndices()) { |
| 246 | SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); |
| 247 | assert(GEP->getPointerOperandType()->isPointerTy()); |
| 248 | Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); |
| 249 | } else { |
| 250 | OffsetValue = EmitGEPOffset(Builder, *TD, GEP); |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | if (!OffsetValue && ConstAlloc == Const) { |
| 255 | if (Size < Offset || (Size - Offset) < NeededSize) { |
| 256 | // Out of bounds |
| 257 | Builder->CreateBr(getTrapBB()); |
| 258 | ++ChecksAdded; |
| 259 | return true; |
| 260 | } |
| 261 | // in bounds |
| 262 | ++ChecksSkipped; |
| 263 | return false; |
| 264 | } |
| 265 | |
| 266 | if (OffsetValue) |
| 267 | OffsetValue = Builder->CreateZExt(OffsetValue, SizeTy); |
| 268 | else |
| 269 | OffsetValue = ConstantInt::get(SizeTy, Offset); |
| 270 | |
| 271 | if (SizeValue) |
| 272 | SizeValue = Builder->CreateZExt(SizeValue, SizeTy); |
| 273 | else |
| 274 | SizeValue = ConstantInt::get(SizeTy, Size); |
| 275 | |
| 276 | Value *NeededSizeVal = ConstantInt::get(SizeTy, NeededSize); |
| 277 | Value *ObjSize = Builder->CreateSub(SizeValue, OffsetValue); |
| 278 | Value *Cmp1 = Builder->CreateICmpULT(SizeValue, OffsetValue); |
| 279 | Value *Cmp2 = Builder->CreateICmpULT(ObjSize, NeededSizeVal); |
| 280 | Value *Or = Builder->CreateOr(Cmp1, Cmp2); |
| 281 | |
| 282 | // FIXME: add unlikely branch taken metadata? |
| 283 | Instruction *Inst = Builder->GetInsertPoint(); |
| 284 | BasicBlock *OldBB = Inst->getParent(); |
| 285 | BasicBlock *Cont = OldBB->splitBasicBlock(Inst); |
| 286 | OldBB->getTerminator()->eraseFromParent(); |
| 287 | BranchInst::Create(getTrapBB(), Cont, Or, OldBB); |
| 288 | ++ChecksAdded; |
| 289 | return true; |
| 290 | } |
| 291 | |
| 292 | bool BoundsChecking::runOnFunction(Function &F) { |
| 293 | TD = &getAnalysis<TargetData>(); |
| 294 | |
| 295 | TrapBB = 0; |
| 296 | Fn = &F; |
| 297 | BuilderTy TheBuilder(F.getContext(), TargetFolder(TD)); |
| 298 | Builder = &TheBuilder; |
| 299 | |
| 300 | // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory |
| 301 | // touching instructions |
| 302 | std::vector<Instruction*> WorkList; |
| 303 | for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { |
| 304 | Instruction *I = &*i; |
| 305 | if (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<AtomicCmpXchgInst>(I) || |
| 306 | isa<AtomicRMWInst>(I)) |
| 307 | WorkList.push_back(I); |
| 308 | } |
| 309 | |
| 310 | bool MadeChange = false; |
Nuno Lopes | 2f0a748 | 2012-05-22 22:02:19 +0000 | [diff] [blame] | 311 | for (std::vector<Instruction*>::iterator i = WorkList.begin(), |
| 312 | e = WorkList.end(); i != e; ++i) { |
| 313 | Instruction *I = *i; |
Nuno Lopes | 5c525b5 | 2012-05-22 17:19:09 +0000 | [diff] [blame] | 314 | |
| 315 | Builder->SetInsertPoint(I); |
| 316 | if (LoadInst *LI = dyn_cast<LoadInst>(I)) { |
| 317 | MadeChange |= instrument(LI->getPointerOperand(), LI); |
| 318 | } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { |
| 319 | MadeChange |= instrument(SI->getPointerOperand(), SI->getValueOperand()); |
| 320 | } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(I)) { |
| 321 | MadeChange |= instrument(AI->getPointerOperand(),AI->getCompareOperand()); |
| 322 | } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(I)) { |
| 323 | MadeChange |= instrument(AI->getPointerOperand(), AI->getValOperand()); |
| 324 | } else { |
| 325 | llvm_unreachable("unknown Instruction type"); |
| 326 | } |
| 327 | } |
| 328 | return MadeChange; |
| 329 | } |
| 330 | |
| 331 | FunctionPass *llvm::createBoundsCheckingPass(unsigned Penalty) { |
| 332 | return new BoundsChecking(Penalty); |
| 333 | } |