Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 1 | //===-- ThreadSanitizer.cpp - race detector -------------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file is a part of ThreadSanitizer, a race detector. |
| 11 | // |
| 12 | // The tool is under development, for the details about previous versions see |
| 13 | // http://code.google.com/p/data-race-test |
| 14 | // |
| 15 | // The instrumentation phase is quite simple: |
| 16 | // - Insert calls to run-time library before every memory access. |
| 17 | // - Optimizations may apply to avoid instrumenting some of the accesses. |
| 18 | // - Insert calls at function entry/exit. |
| 19 | // The rest is handled by the run-time library. |
| 20 | //===----------------------------------------------------------------------===// |
| 21 | |
| 22 | #define DEBUG_TYPE "tsan" |
| 23 | |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 24 | #include "FunctionBlackList.h" |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 25 | #include "llvm/ADT/SmallSet.h" |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 26 | #include "llvm/ADT/SmallString.h" |
| 27 | #include "llvm/ADT/SmallVector.h" |
| 28 | #include "llvm/ADT/StringExtras.h" |
| 29 | #include "llvm/Intrinsics.h" |
| 30 | #include "llvm/Function.h" |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 31 | #include "llvm/LLVMContext.h" |
| 32 | #include "llvm/Metadata.h" |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 33 | #include "llvm/Module.h" |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 34 | #include "llvm/Support/CommandLine.h" |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 35 | #include "llvm/Support/Debug.h" |
| 36 | #include "llvm/Support/IRBuilder.h" |
| 37 | #include "llvm/Support/MathExtras.h" |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 38 | #include "llvm/Support/raw_ostream.h" |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 39 | #include "llvm/Target/TargetData.h" |
| 40 | #include "llvm/Transforms/Instrumentation.h" |
| 41 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
| 42 | #include "llvm/Type.h" |
| 43 | |
| 44 | using namespace llvm; |
| 45 | |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 46 | static cl::opt<std::string> ClBlackListFile("tsan-blacklist", |
| 47 | cl::desc("Blacklist file"), cl::Hidden); |
| 48 | |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 49 | static cl::opt<bool> ClPrintStats("tsan-print-stats", |
| 50 | cl::desc("Print ThreadSanitizer instrumentation stats"), cl::Hidden); |
| 51 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 52 | namespace { |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 53 | |
| 54 | // Stats counters for ThreadSanitizer instrumentation. |
| 55 | struct ThreadSanitizerStats { |
| 56 | size_t NumInstrumentedReads; |
| 57 | size_t NumInstrumentedWrites; |
| 58 | size_t NumOmittedReadsBeforeWrite; |
| 59 | size_t NumAccessesWithBadSize; |
| 60 | size_t NumInstrumentedVtableWrites; |
| 61 | }; |
| 62 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 63 | /// ThreadSanitizer: instrument the code in module to find races. |
| 64 | struct ThreadSanitizer : public FunctionPass { |
| 65 | ThreadSanitizer(); |
| 66 | bool runOnFunction(Function &F); |
| 67 | bool doInitialization(Module &M); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 68 | bool doFinalization(Module &M); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 69 | bool instrumentLoadOrStore(Instruction *I); |
| 70 | static char ID; // Pass identification, replacement for typeid. |
| 71 | |
| 72 | private: |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 73 | void choseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, |
| 74 | SmallVectorImpl<Instruction*> &All); |
| 75 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 76 | TargetData *TD; |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 77 | OwningPtr<FunctionBlackList> BL; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 78 | // Callbacks to run-time library are computed in doInitialization. |
| 79 | Value *TsanFuncEntry; |
| 80 | Value *TsanFuncExit; |
| 81 | // Accesses sizes are powers of two: 1, 2, 4, 8, 16. |
Kostya Serebryany | 3eccaa6 | 2012-02-14 00:52:07 +0000 | [diff] [blame] | 82 | static const size_t kNumberOfAccessSizes = 5; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 83 | Value *TsanRead[kNumberOfAccessSizes]; |
| 84 | Value *TsanWrite[kNumberOfAccessSizes]; |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 85 | Value *TsanVptrUpdate; |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 86 | |
| 87 | // Stats are modified w/o synchronization. |
| 88 | ThreadSanitizerStats stats; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 89 | }; |
| 90 | } // namespace |
| 91 | |
| 92 | char ThreadSanitizer::ID = 0; |
| 93 | INITIALIZE_PASS(ThreadSanitizer, "tsan", |
| 94 | "ThreadSanitizer: detects data races.", |
| 95 | false, false) |
| 96 | |
| 97 | ThreadSanitizer::ThreadSanitizer() |
| 98 | : FunctionPass(ID), |
| 99 | TD(NULL) { |
| 100 | } |
| 101 | |
| 102 | FunctionPass *llvm::createThreadSanitizerPass() { |
| 103 | return new ThreadSanitizer(); |
| 104 | } |
| 105 | |
| 106 | bool ThreadSanitizer::doInitialization(Module &M) { |
| 107 | TD = getAnalysisIfAvailable<TargetData>(); |
| 108 | if (!TD) |
| 109 | return false; |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 110 | BL.reset(new FunctionBlackList(ClBlackListFile)); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 111 | memset(&stats, 0, sizeof(stats)); |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 112 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 113 | // Always insert a call to __tsan_init into the module's CTORs. |
| 114 | IRBuilder<> IRB(M.getContext()); |
| 115 | Value *TsanInit = M.getOrInsertFunction("__tsan_init", |
| 116 | IRB.getVoidTy(), NULL); |
| 117 | appendToGlobalCtors(M, cast<Function>(TsanInit), 0); |
| 118 | |
| 119 | // Initialize the callbacks. |
| 120 | TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", IRB.getVoidTy(), |
| 121 | IRB.getInt8PtrTy(), NULL); |
| 122 | TsanFuncExit = M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), |
| 123 | NULL); |
Kostya Serebryany | 3eccaa6 | 2012-02-14 00:52:07 +0000 | [diff] [blame] | 124 | for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 125 | SmallString<32> ReadName("__tsan_read"); |
| 126 | ReadName += itostr(1 << i); |
| 127 | TsanRead[i] = M.getOrInsertFunction(ReadName, IRB.getVoidTy(), |
| 128 | IRB.getInt8PtrTy(), NULL); |
| 129 | SmallString<32> WriteName("__tsan_write"); |
| 130 | WriteName += itostr(1 << i); |
| 131 | TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(), |
| 132 | IRB.getInt8PtrTy(), NULL); |
| 133 | } |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 134 | TsanVptrUpdate = M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(), |
| 135 | IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), |
| 136 | NULL); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 137 | return true; |
| 138 | } |
| 139 | |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 140 | bool ThreadSanitizer::doFinalization(Module &M) { |
| 141 | if (ClPrintStats) { |
| 142 | errs() << "ThreadSanitizerStats " << M.getModuleIdentifier() |
| 143 | << ": wr " << stats.NumInstrumentedWrites |
| 144 | << "; rd " << stats.NumInstrumentedReads |
| 145 | << "; vt " << stats.NumInstrumentedVtableWrites |
| 146 | << "; bs " << stats.NumAccessesWithBadSize |
| 147 | << "; rbw " << stats.NumOmittedReadsBeforeWrite |
| 148 | << "\n"; |
| 149 | } |
| 150 | return true; |
| 151 | } |
| 152 | |
| 153 | // Instrumenting some of the accesses may be proven redundant. |
| 154 | // Currently handled: |
| 155 | // - read-before-write (within same BB, no calls between) |
| 156 | // |
| 157 | // We do not handle some of the patterns that should not survive |
| 158 | // after the classic compiler optimizations. |
| 159 | // E.g. two reads from the same temp should be eliminated by CSE, |
| 160 | // two writes should be eliminated by DSE, etc. |
| 161 | // |
| 162 | // 'Local' is a vector of insns within the same BB (no calls between). |
| 163 | // 'All' is a vector of insns that will be instrumented. |
| 164 | void ThreadSanitizer::choseInstructionsToInstrument( |
| 165 | SmallVectorImpl<Instruction*> &Local, |
| 166 | SmallVectorImpl<Instruction*> &All) { |
| 167 | SmallSet<Value*, 8> WriteTargets; |
| 168 | // Iterate from the end. |
| 169 | for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(), |
| 170 | E = Local.rend(); It != E; ++It) { |
| 171 | Instruction *I = *It; |
| 172 | if (StoreInst *Store = dyn_cast<StoreInst>(I)) { |
| 173 | WriteTargets.insert(Store->getPointerOperand()); |
| 174 | } else { |
| 175 | LoadInst *Load = cast<LoadInst>(I); |
| 176 | if (WriteTargets.count(Load->getPointerOperand())) { |
| 177 | // We will write to this temp, so no reason to analyze the read. |
| 178 | stats.NumOmittedReadsBeforeWrite++; |
| 179 | continue; |
| 180 | } |
| 181 | } |
| 182 | All.push_back(I); |
| 183 | } |
| 184 | Local.clear(); |
| 185 | } |
| 186 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 187 | bool ThreadSanitizer::runOnFunction(Function &F) { |
| 188 | if (!TD) return false; |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 189 | if (BL->isIn(F)) return false; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 190 | SmallVector<Instruction*, 8> RetVec; |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 191 | SmallVector<Instruction*, 8> AllLoadsAndStores; |
| 192 | SmallVector<Instruction*, 8> LocalLoadsAndStores; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 193 | bool Res = false; |
| 194 | bool HasCalls = false; |
| 195 | |
| 196 | // Traverse all instructions, collect loads/stores/returns, check for calls. |
| 197 | for (Function::iterator FI = F.begin(), FE = F.end(); |
| 198 | FI != FE; ++FI) { |
| 199 | BasicBlock &BB = *FI; |
| 200 | for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); |
| 201 | BI != BE; ++BI) { |
| 202 | if (isa<LoadInst>(BI) || isa<StoreInst>(BI)) |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 203 | LocalLoadsAndStores.push_back(BI); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 204 | else if (isa<ReturnInst>(BI)) |
| 205 | RetVec.push_back(BI); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 206 | else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 207 | HasCalls = true; |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 208 | choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); |
| 209 | } |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 210 | } |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 211 | choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 212 | } |
| 213 | |
| 214 | // We have collected all loads and stores. |
| 215 | // FIXME: many of these accesses do not need to be checked for races |
| 216 | // (e.g. variables that do not escape, etc). |
| 217 | |
| 218 | // Instrument memory accesses. |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 219 | for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { |
| 220 | Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 221 | } |
| 222 | |
| 223 | // Instrument function entry/exit points if there were instrumented accesses. |
| 224 | if (Res || HasCalls) { |
| 225 | IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); |
| 226 | Value *ReturnAddress = IRB.CreateCall( |
| 227 | Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), |
| 228 | IRB.getInt32(0)); |
| 229 | IRB.CreateCall(TsanFuncEntry, ReturnAddress); |
| 230 | for (size_t i = 0, n = RetVec.size(); i < n; ++i) { |
| 231 | IRBuilder<> IRBRet(RetVec[i]); |
| 232 | IRBRet.CreateCall(TsanFuncExit); |
| 233 | } |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 234 | Res = true; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 235 | } |
| 236 | return Res; |
| 237 | } |
| 238 | |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 239 | static bool isVtableAccess(Instruction *I) { |
| 240 | if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) { |
| 241 | if (Tag->getNumOperands() < 1) return false; |
| 242 | if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { |
| 243 | if (Tag1->getString() == "vtable pointer") return true; |
| 244 | } |
| 245 | } |
| 246 | return false; |
| 247 | } |
| 248 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 249 | bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { |
| 250 | IRBuilder<> IRB(I); |
| 251 | bool IsWrite = isa<StoreInst>(*I); |
| 252 | Value *Addr = IsWrite |
| 253 | ? cast<StoreInst>(I)->getPointerOperand() |
| 254 | : cast<LoadInst>(I)->getPointerOperand(); |
| 255 | Type *OrigPtrTy = Addr->getType(); |
| 256 | Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); |
| 257 | assert(OrigTy->isSized()); |
| 258 | uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); |
| 259 | if (TypeSize != 8 && TypeSize != 16 && |
| 260 | TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 261 | stats.NumAccessesWithBadSize++; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 262 | // Ignore all unusual sizes. |
| 263 | return false; |
| 264 | } |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 265 | if (IsWrite && isVtableAccess(I)) { |
| 266 | Value *StoredValue = cast<StoreInst>(I)->getValueOperand(); |
| 267 | IRB.CreateCall2(TsanVptrUpdate, |
| 268 | IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), |
| 269 | IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 270 | stats.NumInstrumentedVtableWrites++; |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 271 | return true; |
| 272 | } |
Kostya Serebryany | 3eccaa6 | 2012-02-14 00:52:07 +0000 | [diff] [blame] | 273 | size_t Idx = CountTrailingZeros_32(TypeSize / 8); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 274 | assert(Idx < kNumberOfAccessSizes); |
| 275 | Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; |
| 276 | IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame^] | 277 | if (IsWrite) stats.NumInstrumentedWrites++; |
| 278 | else stats.NumInstrumentedReads++; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 279 | return true; |
| 280 | } |