ThreadSanitizer, a race detector. First LLVM commit.
Clang patch (flags) will follow shortly.
The run-time library will also follow, but not immediately.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@150423 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
new file mode 100644
index 0000000..ab88d1c
--- /dev/null
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -0,0 +1,169 @@
+//===-- ThreadSanitizer.cpp - race detector -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer, a race detector.
+//
+// The tool is under development, for the details about previous versions see
+// http://code.google.com/p/data-race-test
+//
+// The instrumentation phase is quite simple:
+//   - Insert calls to run-time library before every memory access.
+//      - Optimizations may apply to avoid instrumenting some of the accesses.
+//   - Insert calls at function entry/exit.
+// The rest is handled by the run-time library.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tsan"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Type.h"
+
+using namespace llvm;
+
+namespace {
+/// ThreadSanitizer: instrument the code in module to find races.
+struct ThreadSanitizer : public FunctionPass {
+  ThreadSanitizer();
+  bool runOnFunction(Function &F);
+  bool doInitialization(Module &M);
+  bool instrumentLoadOrStore(Instruction *I);
+  static char ID;  // Pass identification, replacement for typeid.
+
+ private:
+  TargetData *TD;
+  // Callbacks to run-time library are computed in doInitialization.
+  Value *TsanFuncEntry;
+  Value *TsanFuncExit;
+  // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+  static const int kNumberOfAccessSizes = 5;
+  Value *TsanRead[kNumberOfAccessSizes];
+  Value *TsanWrite[kNumberOfAccessSizes];
+};
+}  // namespace
+
+char ThreadSanitizer::ID = 0;
+INITIALIZE_PASS(ThreadSanitizer, "tsan",
+    "ThreadSanitizer: detects data races.",
+    false, false)
+
+ThreadSanitizer::ThreadSanitizer()
+  : FunctionPass(ID),
+  TD(NULL) {
+}
+
+FunctionPass *llvm::createThreadSanitizerPass() {
+  return new ThreadSanitizer();
+}
+
+bool ThreadSanitizer::doInitialization(Module &M) {
+  TD = getAnalysisIfAvailable<TargetData>();
+  if (!TD)
+    return false;
+  // Always insert a call to __tsan_init into the module's CTORs.
+  IRBuilder<> IRB(M.getContext());
+  Value *TsanInit = M.getOrInsertFunction("__tsan_init",
+                                          IRB.getVoidTy(), NULL);
+  appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
+
+  // Initialize the callbacks.
+  TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", IRB.getVoidTy(),
+                                        IRB.getInt8PtrTy(), NULL);
+  TsanFuncExit = M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(),
+                                       NULL);
+  for (int i = 0; i < kNumberOfAccessSizes; ++i) {
+    SmallString<32> ReadName("__tsan_read");
+    ReadName += itostr(1 << i);
+    TsanRead[i] = M.getOrInsertFunction(ReadName, IRB.getVoidTy(),
+                                        IRB.getInt8PtrTy(), NULL);
+    SmallString<32> WriteName("__tsan_write");
+    WriteName += itostr(1 << i);
+    TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(),
+                                         IRB.getInt8PtrTy(), NULL);
+  }
+  return true;
+}
+
+bool ThreadSanitizer::runOnFunction(Function &F) {
+  if (!TD) return false;
+  SmallVector<Instruction*, 8> RetVec;
+  SmallVector<Instruction*, 8> LoadsAndStores;
+  bool Res = false;
+  bool HasCalls = false;
+
+  // Traverse all instructions, collect loads/stores/returns, check for calls.
+  for (Function::iterator FI = F.begin(), FE = F.end();
+       FI != FE; ++FI) {
+    BasicBlock &BB = *FI;
+    for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
+         BI != BE; ++BI) {
+      if (isa<LoadInst>(BI) || isa<StoreInst>(BI))
+        LoadsAndStores.push_back(BI);
+      else if (isa<ReturnInst>(BI))
+        RetVec.push_back(BI);
+      else if (isa<CallInst>(BI) || isa<InvokeInst>(BI))
+        HasCalls = true;
+    }
+  }
+
+  // We have collected all loads and stores.
+  // FIXME: many of these accesses do not need to be checked for races
+  // (e.g. variables that do not escape, etc).
+
+  // Instrument memory accesses.
+  for (size_t i = 0, n = LoadsAndStores.size(); i < n; ++i) {
+    Res |= instrumentLoadOrStore(LoadsAndStores[i]);
+  }
+
+  // Instrument function entry/exit points if there were instrumented accesses.
+  if (Res || HasCalls) {
+    IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+    Value *ReturnAddress = IRB.CreateCall(
+        Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
+        IRB.getInt32(0));
+    IRB.CreateCall(TsanFuncEntry, ReturnAddress);
+    for (size_t i = 0, n = RetVec.size(); i < n; ++i) {
+      IRBuilder<> IRBRet(RetVec[i]);
+      IRBRet.CreateCall(TsanFuncExit);
+    }
+  }
+  return Res;
+}
+
+bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
+  IRBuilder<> IRB(I);
+  bool IsWrite = isa<StoreInst>(*I);
+  Value *Addr = IsWrite
+      ? cast<StoreInst>(I)->getPointerOperand()
+      : cast<LoadInst>(I)->getPointerOperand();
+  Type *OrigPtrTy = Addr->getType();
+  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
+  assert(OrigTy->isSized());
+  uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
+  if (TypeSize != 8  && TypeSize != 16 &&
+      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+    // Ignore all unusual sizes.
+    return false;
+  }
+  uint32_t Idx = CountTrailingZeros_32(TypeSize / 8);
+  assert(Idx < kNumberOfAccessSizes);
+  Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
+  IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+  return true;
+}