Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 1 | //===-- ThreadSanitizer.cpp - race detector -------------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file is a part of ThreadSanitizer, a race detector. |
| 11 | // |
| 12 | // The tool is under development, for the details about previous versions see |
| 13 | // http://code.google.com/p/data-race-test |
| 14 | // |
| 15 | // The instrumentation phase is quite simple: |
| 16 | // - Insert calls to run-time library before every memory access. |
| 17 | // - Optimizations may apply to avoid instrumenting some of the accesses. |
| 18 | // - Insert calls at function entry/exit. |
| 19 | // The rest is handled by the run-time library. |
| 20 | //===----------------------------------------------------------------------===// |
| 21 | |
| 22 | #define DEBUG_TYPE "tsan" |
| 23 | |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 24 | #include "FunctionBlackList.h" |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 25 | #include "llvm/ADT/SmallSet.h" |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 26 | #include "llvm/ADT/SmallString.h" |
| 27 | #include "llvm/ADT/SmallVector.h" |
| 28 | #include "llvm/ADT/StringExtras.h" |
| 29 | #include "llvm/Intrinsics.h" |
| 30 | #include "llvm/Function.h" |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 31 | #include "llvm/LLVMContext.h" |
| 32 | #include "llvm/Metadata.h" |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 33 | #include "llvm/Module.h" |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 34 | #include "llvm/Support/CommandLine.h" |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 35 | #include "llvm/Support/Debug.h" |
| 36 | #include "llvm/Support/IRBuilder.h" |
| 37 | #include "llvm/Support/MathExtras.h" |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 38 | #include "llvm/Support/raw_ostream.h" |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 39 | #include "llvm/Target/TargetData.h" |
| 40 | #include "llvm/Transforms/Instrumentation.h" |
| 41 | #include "llvm/Transforms/Utils/ModuleUtils.h" |
| 42 | #include "llvm/Type.h" |
| 43 | |
| 44 | using namespace llvm; |
| 45 | |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 46 | static cl::opt<std::string> ClBlackListFile("tsan-blacklist", |
| 47 | cl::desc("Blacklist file"), cl::Hidden); |
| 48 | |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 49 | static cl::opt<bool> ClPrintStats("tsan-print-stats", |
| 50 | cl::desc("Print ThreadSanitizer instrumentation stats"), cl::Hidden); |
| 51 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 52 | namespace { |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 53 | |
| 54 | // Stats counters for ThreadSanitizer instrumentation. |
| 55 | struct ThreadSanitizerStats { |
| 56 | size_t NumInstrumentedReads; |
| 57 | size_t NumInstrumentedWrites; |
| 58 | size_t NumOmittedReadsBeforeWrite; |
| 59 | size_t NumAccessesWithBadSize; |
| 60 | size_t NumInstrumentedVtableWrites; |
Kostya Serebryany | cff60c1 | 2012-04-10 22:29:17 +0000 | [diff] [blame^] | 61 | size_t NumOmittedReadsFromConstantGlobals; |
| 62 | size_t NumOmittedReadsFromVtable; |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 63 | }; |
| 64 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 65 | /// ThreadSanitizer: instrument the code in module to find races. |
| 66 | struct ThreadSanitizer : public FunctionPass { |
| 67 | ThreadSanitizer(); |
| 68 | bool runOnFunction(Function &F); |
| 69 | bool doInitialization(Module &M); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 70 | bool doFinalization(Module &M); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 71 | bool instrumentLoadOrStore(Instruction *I); |
| 72 | static char ID; // Pass identification, replacement for typeid. |
| 73 | |
| 74 | private: |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 75 | void choseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, |
| 76 | SmallVectorImpl<Instruction*> &All); |
Kostya Serebryany | cff60c1 | 2012-04-10 22:29:17 +0000 | [diff] [blame^] | 77 | bool addrPointsToConstantData(Value *Addr); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 78 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 79 | TargetData *TD; |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 80 | OwningPtr<FunctionBlackList> BL; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 81 | // Callbacks to run-time library are computed in doInitialization. |
| 82 | Value *TsanFuncEntry; |
| 83 | Value *TsanFuncExit; |
| 84 | // Accesses sizes are powers of two: 1, 2, 4, 8, 16. |
Kostya Serebryany | 3eccaa6 | 2012-02-14 00:52:07 +0000 | [diff] [blame] | 85 | static const size_t kNumberOfAccessSizes = 5; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 86 | Value *TsanRead[kNumberOfAccessSizes]; |
| 87 | Value *TsanWrite[kNumberOfAccessSizes]; |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 88 | Value *TsanVptrUpdate; |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 89 | |
| 90 | // Stats are modified w/o synchronization. |
| 91 | ThreadSanitizerStats stats; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 92 | }; |
| 93 | } // namespace |
| 94 | |
| 95 | char ThreadSanitizer::ID = 0; |
| 96 | INITIALIZE_PASS(ThreadSanitizer, "tsan", |
| 97 | "ThreadSanitizer: detects data races.", |
| 98 | false, false) |
| 99 | |
| 100 | ThreadSanitizer::ThreadSanitizer() |
| 101 | : FunctionPass(ID), |
| 102 | TD(NULL) { |
| 103 | } |
| 104 | |
| 105 | FunctionPass *llvm::createThreadSanitizerPass() { |
| 106 | return new ThreadSanitizer(); |
| 107 | } |
| 108 | |
| 109 | bool ThreadSanitizer::doInitialization(Module &M) { |
| 110 | TD = getAnalysisIfAvailable<TargetData>(); |
| 111 | if (!TD) |
| 112 | return false; |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 113 | BL.reset(new FunctionBlackList(ClBlackListFile)); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 114 | memset(&stats, 0, sizeof(stats)); |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 115 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 116 | // Always insert a call to __tsan_init into the module's CTORs. |
| 117 | IRBuilder<> IRB(M.getContext()); |
| 118 | Value *TsanInit = M.getOrInsertFunction("__tsan_init", |
| 119 | IRB.getVoidTy(), NULL); |
| 120 | appendToGlobalCtors(M, cast<Function>(TsanInit), 0); |
| 121 | |
| 122 | // Initialize the callbacks. |
| 123 | TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", IRB.getVoidTy(), |
| 124 | IRB.getInt8PtrTy(), NULL); |
| 125 | TsanFuncExit = M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), |
| 126 | NULL); |
Kostya Serebryany | 3eccaa6 | 2012-02-14 00:52:07 +0000 | [diff] [blame] | 127 | for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 128 | SmallString<32> ReadName("__tsan_read"); |
| 129 | ReadName += itostr(1 << i); |
| 130 | TsanRead[i] = M.getOrInsertFunction(ReadName, IRB.getVoidTy(), |
| 131 | IRB.getInt8PtrTy(), NULL); |
| 132 | SmallString<32> WriteName("__tsan_write"); |
| 133 | WriteName += itostr(1 << i); |
| 134 | TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(), |
| 135 | IRB.getInt8PtrTy(), NULL); |
| 136 | } |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 137 | TsanVptrUpdate = M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(), |
| 138 | IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), |
| 139 | NULL); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 140 | return true; |
| 141 | } |
| 142 | |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 143 | bool ThreadSanitizer::doFinalization(Module &M) { |
| 144 | if (ClPrintStats) { |
| 145 | errs() << "ThreadSanitizerStats " << M.getModuleIdentifier() |
| 146 | << ": wr " << stats.NumInstrumentedWrites |
| 147 | << "; rd " << stats.NumInstrumentedReads |
| 148 | << "; vt " << stats.NumInstrumentedVtableWrites |
| 149 | << "; bs " << stats.NumAccessesWithBadSize |
| 150 | << "; rbw " << stats.NumOmittedReadsBeforeWrite |
Kostya Serebryany | cff60c1 | 2012-04-10 22:29:17 +0000 | [diff] [blame^] | 151 | << "; rcg " << stats.NumOmittedReadsFromConstantGlobals |
| 152 | << "; rvt " << stats.NumOmittedReadsFromVtable |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 153 | << "\n"; |
| 154 | } |
| 155 | return true; |
| 156 | } |
| 157 | |
Kostya Serebryany | cff60c1 | 2012-04-10 22:29:17 +0000 | [diff] [blame^] | 158 | static bool isVtableAccess(Instruction *I) { |
| 159 | if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) { |
| 160 | if (Tag->getNumOperands() < 1) return false; |
| 161 | if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { |
| 162 | if (Tag1->getString() == "vtable pointer") return true; |
| 163 | } |
| 164 | } |
| 165 | return false; |
| 166 | } |
| 167 | |
| 168 | bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { |
| 169 | // If this is a GEP, just analyze its pointer operand. |
| 170 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) |
| 171 | Addr = GEP->getPointerOperand(); |
| 172 | |
| 173 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { |
| 174 | if (GV->isConstant()) { |
| 175 | // Reads from constant globals can not race with any writes. |
| 176 | stats.NumOmittedReadsFromConstantGlobals++; |
| 177 | return true; |
| 178 | } |
| 179 | } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) { |
| 180 | if (isVtableAccess(L)) { |
| 181 | // Reads from a vtable pointer can not race with any writes. |
| 182 | stats.NumOmittedReadsFromVtable++; |
| 183 | return true; |
| 184 | } |
| 185 | } |
| 186 | return false; |
| 187 | } |
| 188 | |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 189 | // Instrumenting some of the accesses may be proven redundant. |
| 190 | // Currently handled: |
| 191 | // - read-before-write (within same BB, no calls between) |
| 192 | // |
| 193 | // We do not handle some of the patterns that should not survive |
| 194 | // after the classic compiler optimizations. |
| 195 | // E.g. two reads from the same temp should be eliminated by CSE, |
| 196 | // two writes should be eliminated by DSE, etc. |
| 197 | // |
| 198 | // 'Local' is a vector of insns within the same BB (no calls between). |
| 199 | // 'All' is a vector of insns that will be instrumented. |
| 200 | void ThreadSanitizer::choseInstructionsToInstrument( |
| 201 | SmallVectorImpl<Instruction*> &Local, |
| 202 | SmallVectorImpl<Instruction*> &All) { |
| 203 | SmallSet<Value*, 8> WriteTargets; |
| 204 | // Iterate from the end. |
| 205 | for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(), |
| 206 | E = Local.rend(); It != E; ++It) { |
| 207 | Instruction *I = *It; |
| 208 | if (StoreInst *Store = dyn_cast<StoreInst>(I)) { |
| 209 | WriteTargets.insert(Store->getPointerOperand()); |
| 210 | } else { |
| 211 | LoadInst *Load = cast<LoadInst>(I); |
Kostya Serebryany | cff60c1 | 2012-04-10 22:29:17 +0000 | [diff] [blame^] | 212 | Value *Addr = Load->getPointerOperand(); |
| 213 | if (WriteTargets.count(Addr)) { |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 214 | // We will write to this temp, so no reason to analyze the read. |
| 215 | stats.NumOmittedReadsBeforeWrite++; |
| 216 | continue; |
| 217 | } |
Kostya Serebryany | cff60c1 | 2012-04-10 22:29:17 +0000 | [diff] [blame^] | 218 | if (addrPointsToConstantData(Addr)) { |
| 219 | // Addr points to some constant data -- it can not race with any writes. |
| 220 | continue; |
| 221 | } |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 222 | } |
| 223 | All.push_back(I); |
| 224 | } |
| 225 | Local.clear(); |
| 226 | } |
| 227 | |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 228 | bool ThreadSanitizer::runOnFunction(Function &F) { |
| 229 | if (!TD) return false; |
Kostya Serebryany | 6e590e3 | 2012-03-14 23:33:24 +0000 | [diff] [blame] | 230 | if (BL->isIn(F)) return false; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 231 | SmallVector<Instruction*, 8> RetVec; |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 232 | SmallVector<Instruction*, 8> AllLoadsAndStores; |
| 233 | SmallVector<Instruction*, 8> LocalLoadsAndStores; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 234 | bool Res = false; |
| 235 | bool HasCalls = false; |
| 236 | |
| 237 | // Traverse all instructions, collect loads/stores/returns, check for calls. |
| 238 | for (Function::iterator FI = F.begin(), FE = F.end(); |
| 239 | FI != FE; ++FI) { |
| 240 | BasicBlock &BB = *FI; |
| 241 | for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); |
| 242 | BI != BE; ++BI) { |
| 243 | if (isa<LoadInst>(BI) || isa<StoreInst>(BI)) |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 244 | LocalLoadsAndStores.push_back(BI); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 245 | else if (isa<ReturnInst>(BI)) |
| 246 | RetVec.push_back(BI); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 247 | else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 248 | HasCalls = true; |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 249 | choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); |
| 250 | } |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 251 | } |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 252 | choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 253 | } |
| 254 | |
| 255 | // We have collected all loads and stores. |
| 256 | // FIXME: many of these accesses do not need to be checked for races |
| 257 | // (e.g. variables that do not escape, etc). |
| 258 | |
| 259 | // Instrument memory accesses. |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 260 | for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { |
| 261 | Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 262 | } |
| 263 | |
| 264 | // Instrument function entry/exit points if there were instrumented accesses. |
| 265 | if (Res || HasCalls) { |
| 266 | IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); |
| 267 | Value *ReturnAddress = IRB.CreateCall( |
| 268 | Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), |
| 269 | IRB.getInt32(0)); |
| 270 | IRB.CreateCall(TsanFuncEntry, ReturnAddress); |
| 271 | for (size_t i = 0, n = RetVec.size(); i < n; ++i) { |
| 272 | IRBuilder<> IRBRet(RetVec[i]); |
| 273 | IRBRet.CreateCall(TsanFuncExit); |
| 274 | } |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 275 | Res = true; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 276 | } |
| 277 | return Res; |
| 278 | } |
| 279 | |
| 280 | bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { |
| 281 | IRBuilder<> IRB(I); |
| 282 | bool IsWrite = isa<StoreInst>(*I); |
| 283 | Value *Addr = IsWrite |
| 284 | ? cast<StoreInst>(I)->getPointerOperand() |
| 285 | : cast<LoadInst>(I)->getPointerOperand(); |
| 286 | Type *OrigPtrTy = Addr->getType(); |
| 287 | Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); |
| 288 | assert(OrigTy->isSized()); |
| 289 | uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); |
| 290 | if (TypeSize != 8 && TypeSize != 16 && |
| 291 | TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 292 | stats.NumAccessesWithBadSize++; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 293 | // Ignore all unusual sizes. |
| 294 | return false; |
| 295 | } |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 296 | if (IsWrite && isVtableAccess(I)) { |
| 297 | Value *StoredValue = cast<StoreInst>(I)->getValueOperand(); |
| 298 | IRB.CreateCall2(TsanVptrUpdate, |
| 299 | IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), |
| 300 | IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 301 | stats.NumInstrumentedVtableWrites++; |
Kostya Serebryany | 52eb6992 | 2012-03-26 17:35:03 +0000 | [diff] [blame] | 302 | return true; |
| 303 | } |
Kostya Serebryany | 3eccaa6 | 2012-02-14 00:52:07 +0000 | [diff] [blame] | 304 | size_t Idx = CountTrailingZeros_32(TypeSize / 8); |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 305 | assert(Idx < kNumberOfAccessSizes); |
| 306 | Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; |
| 307 | IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); |
Kostya Serebryany | 2076af0 | 2012-04-10 18:18:56 +0000 | [diff] [blame] | 308 | if (IsWrite) stats.NumInstrumentedWrites++; |
| 309 | else stats.NumInstrumentedReads++; |
Kostya Serebryany | 60ebb194 | 2012-02-13 22:50:51 +0000 | [diff] [blame] | 310 | return true; |
| 311 | } |