[tsan] Atomic support for ThreadSanitizer, patch by  Dmitry Vyukov

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@155698 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index e650bbb..2f6c829 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Type.h"
 
@@ -62,26 +63,32 @@
 /// ThreadSanitizer: instrument the code in module to find races.
 struct ThreadSanitizer : public FunctionPass {
   ThreadSanitizer();
+  const char *getPassName() const;
   bool runOnFunction(Function &F);
   bool doInitialization(Module &M);
-  bool instrumentLoadOrStore(Instruction *I);
   static char ID;  // Pass identification, replacement for typeid.
 
  private:
+  bool instrumentLoadOrStore(Instruction *I);
+  bool instrumentAtomic(Instruction *I);
   void choseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
                                      SmallVectorImpl<Instruction*> &All);
   bool addrPointsToConstantData(Value *Addr);
+  int getMemoryAccessFuncIndex(Value *Addr);
 
   TargetData *TD;
   OwningPtr<FunctionBlackList> BL;
+  IntegerType *OrdTy;
   // Callbacks to run-time library are computed in doInitialization.
-  Value *TsanFuncEntry;
-  Value *TsanFuncExit;
+  Function *TsanFuncEntry;
+  Function *TsanFuncExit;
   // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
   static const size_t kNumberOfAccessSizes = 5;
-  Value *TsanRead[kNumberOfAccessSizes];
-  Value *TsanWrite[kNumberOfAccessSizes];
-  Value *TsanVptrUpdate;
+  Function *TsanRead[kNumberOfAccessSizes];
+  Function *TsanWrite[kNumberOfAccessSizes];
+  Function *TsanAtomicLoad[kNumberOfAccessSizes];
+  Function *TsanAtomicStore[kNumberOfAccessSizes];
+  Function *TsanVptrUpdate;
 };
 }  // namespace
 
@@ -90,6 +97,10 @@
     "ThreadSanitizer: detects data races.",
     false, false)
 
+const char *ThreadSanitizer::getPassName() const {
+  return "ThreadSanitizer";
+}
+
 ThreadSanitizer::ThreadSanitizer()
   : FunctionPass(ID),
   TD(NULL) {
@@ -99,6 +110,13 @@
   return new ThreadSanitizer();
 }
 
+static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
+  if (Function *F = dyn_cast<Function>(FuncOrBitcast))
+     return F;
+  FuncOrBitcast->dump();
+  report_fatal_error("ThreadSanitizer interface function redefined");
+}
+
 bool ThreadSanitizer::doInitialization(Module &M) {
   TD = getAnalysisIfAvailable<TargetData>();
   if (!TD)
@@ -112,23 +130,38 @@
   appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
 
   // Initialize the callbacks.
-  TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", IRB.getVoidTy(),
-                                        IRB.getInt8PtrTy(), NULL);
-  TsanFuncExit = M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(),
-                                       NULL);
+  TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction(
+      "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+  TsanFuncExit = checkInterfaceFunction(M.getOrInsertFunction(
+      "__tsan_func_exit", IRB.getVoidTy(), NULL));
+  OrdTy = IRB.getInt32Ty();
   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
-    SmallString<32> ReadName("__tsan_read");
-    ReadName += itostr(1 << i);
-    TsanRead[i] = M.getOrInsertFunction(ReadName, IRB.getVoidTy(),
-                                        IRB.getInt8PtrTy(), NULL);
-    SmallString<32> WriteName("__tsan_write");
-    WriteName += itostr(1 << i);
-    TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(),
-                                         IRB.getInt8PtrTy(), NULL);
+    const size_t ByteSize = 1 << i;
+    const size_t BitSize = ByteSize * 8;
+    SmallString<32> ReadName("__tsan_read" + itostr(ByteSize));
+    TsanRead[i] = checkInterfaceFunction(M.getOrInsertFunction(
+        ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+
+    SmallString<32> WriteName("__tsan_write" + itostr(ByteSize));
+    TsanWrite[i] = checkInterfaceFunction(M.getOrInsertFunction(
+        WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
+
+    Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
+    Type *PtrTy = Ty->getPointerTo();
+    SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +
+                                   "_load");
+    TsanAtomicLoad[i] = checkInterfaceFunction(M.getOrInsertFunction(
+        AtomicLoadName, Ty, PtrTy, OrdTy, NULL));
+
+    SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +
+                                    "_store");
+    TsanAtomicStore[i] = checkInterfaceFunction(M.getOrInsertFunction(
+        AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy,
+        NULL));
   }
-  TsanVptrUpdate = M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(),
-                                         IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
-                                         NULL);
+  TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction(
+      "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(),
+      IRB.getInt8PtrTy(), NULL));
   return true;
 }
 
@@ -202,12 +235,27 @@
   Local.clear();
 }
 
+static bool isAtomic(Instruction *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return LI->isAtomic() && LI->getSynchScope() == CrossThread;
+  else if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->isAtomic() && SI->getSynchScope() == CrossThread;
+  else if (isa<AtomicRMWInst>(I))
+    return true;
+  else if (isa<AtomicCmpXchgInst>(I))
+    return true;
+  else if (FenceInst *FI = dyn_cast<FenceInst>(I))
+    return FI->getSynchScope() == CrossThread;
+  return false;
+}
+
 bool ThreadSanitizer::runOnFunction(Function &F) {
   if (!TD) return false;
   if (BL->isIn(F)) return false;
   SmallVector<Instruction*, 8> RetVec;
   SmallVector<Instruction*, 8> AllLoadsAndStores;
   SmallVector<Instruction*, 8> LocalLoadsAndStores;
+  SmallVector<Instruction*, 8> AtomicAccesses;
   bool Res = false;
   bool HasCalls = false;
 
@@ -217,7 +265,9 @@
     BasicBlock &BB = *FI;
     for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
          BI != BE; ++BI) {
-      if (isa<LoadInst>(BI) || isa<StoreInst>(BI))
+      if (isAtomic(BI))
+        AtomicAccesses.push_back(BI);
+      else if (isa<LoadInst>(BI) || isa<StoreInst>(BI))
         LocalLoadsAndStores.push_back(BI);
       else if (isa<ReturnInst>(BI))
         RetVec.push_back(BI);
@@ -238,6 +288,11 @@
     Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
   }
 
+  // Instrument atomic memory accesses.
+  for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) {
+    Res |= instrumentAtomic(AtomicAccesses[i]);
+  }
+
   // Instrument function entry/exit points if there were instrumented accesses.
   if (Res || HasCalls) {
     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
@@ -260,16 +315,9 @@
   Value *Addr = IsWrite
       ? cast<StoreInst>(I)->getPointerOperand()
       : cast<LoadInst>(I)->getPointerOperand();
-  Type *OrigPtrTy = Addr->getType();
-  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
-  assert(OrigTy->isSized());
-  uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
-  if (TypeSize != 8  && TypeSize != 16 &&
-      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
-    NumAccessesWithBadSize++;
-    // Ignore all unusual sizes.
+  int Idx = getMemoryAccessFuncIndex(Addr);
+  if (Idx < 0)
     return false;
-  }
   if (IsWrite && isVtableAccess(I)) {
     Value *StoredValue = cast<StoreInst>(I)->getValueOperand();
     IRB.CreateCall2(TsanVptrUpdate,
@@ -278,11 +326,82 @@
     NumInstrumentedVtableWrites++;
     return true;
   }
-  size_t Idx = CountTrailingZeros_32(TypeSize / 8);
-  assert(Idx < kNumberOfAccessSizes);
   Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
   IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
   if (IsWrite) NumInstrumentedWrites++;
   else         NumInstrumentedReads++;
   return true;
 }
+
+static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
+  uint32_t v = 0;
+  switch (ord) {
+    case NotAtomic:              assert(false);
+    case Unordered:              // Fall-through.
+    case Monotonic:              v = 1 << 0; break;
+ // case Consume:                v = 1 << 1; break;  // Not specified yet.
+    case Acquire:                v = 1 << 2; break;
+    case Release:                v = 1 << 3; break;
+    case AcquireRelease:         v = 1 << 4; break;
+    case SequentiallyConsistent: v = 1 << 5; break;
+  }
+  return IRB->getInt32(v);
+}
+
+bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
+  IRBuilder<> IRB(I);
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    Value *Addr = LI->getPointerOperand();
+    int Idx = getMemoryAccessFuncIndex(Addr);
+    if (Idx < 0)
+      return false;
+    const size_t ByteSize = 1 << Idx;
+    const size_t BitSize = ByteSize * 8;
+    Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+    Type *PtrTy = Ty->getPointerTo();
+    Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+                     createOrdering(&IRB, LI->getOrdering())};
+    CallInst *C = CallInst::Create(TsanAtomicLoad[Idx],
+                                   ArrayRef<Value*>(Args));
+    ReplaceInstWithInst(I, C);
+
+  } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    Value *Addr = SI->getPointerOperand();
+    int Idx = getMemoryAccessFuncIndex(Addr);
+    if (Idx < 0)
+      return false;
+    const size_t ByteSize = 1 << Idx;
+    const size_t BitSize = ByteSize * 8;
+    Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
+    Type *PtrTy = Ty->getPointerTo();
+    Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
+                     IRB.CreateIntCast(SI->getValueOperand(), Ty, false),
+                     createOrdering(&IRB, SI->getOrdering())};
+    CallInst *C = CallInst::Create(TsanAtomicStore[Idx],
+                                   ArrayRef<Value*>(Args));
+    ReplaceInstWithInst(I, C);
+  } else if (isa<AtomicRMWInst>(I)) {
+    // FIXME: Not yet supported.
+  } else if (isa<AtomicCmpXchgInst>(I)) {
+    // FIXME: Not yet supported.
+  } else if (isa<FenceInst>(I)) {
+    // FIXME: Not yet supported.
+  }
+  return true;
+}
+
+int ThreadSanitizer::getMemoryAccessFuncIndex(Value *Addr) {
+  Type *OrigPtrTy = Addr->getType();
+  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
+  assert(OrigTy->isSized());
+  uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
+  if (TypeSize != 8  && TypeSize != 16 &&
+      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+    NumAccessesWithBadSize++;
+    // Ignore all unusual sizes.
+    return -1;
+  }
+  size_t Idx = CountTrailingZeros_32(TypeSize / 8);
+  assert(Idx < kNumberOfAccessSizes);
+  return Idx;
+}
diff --git a/test/Instrumentation/ThreadSanitizer/atomic.ll b/test/Instrumentation/ThreadSanitizer/atomic.ll
new file mode 100644
index 0000000..02bf215
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -0,0 +1,323 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+; Check that atomic memory operations are converted to calls into ThreadSanitizer runtime.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define i8 @atomic8_load_unordered(i8* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i8* %a unordered, align 1
+  ret i8 %0
+}
+; CHECK: atomic8_load_unordered
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 1)
+
+define i8 @atomic8_load_monotonic(i8* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i8* %a monotonic, align 1
+  ret i8 %0
+}
+; CHECK: atomic8_load_monotonic
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 1)
+
+define i8 @atomic8_load_acquire(i8* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i8* %a acquire, align 1
+  ret i8 %0
+}
+; CHECK: atomic8_load_acquire
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 4)
+
+define i8 @atomic8_load_seq_cst(i8* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i8* %a seq_cst, align 1
+  ret i8 %0
+}
+; CHECK: atomic8_load_seq_cst
+; CHECK: call i8 @__tsan_atomic8_load(i8* %a, i32 32)
+
+define void @atomic8_store_unordered(i8* %a) nounwind uwtable {
+entry:
+  store atomic i8 0, i8* %a unordered, align 1
+  ret void
+}
+; CHECK: atomic8_store_unordered
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 1)
+
+define void @atomic8_store_monotonic(i8* %a) nounwind uwtable {
+entry:
+  store atomic i8 0, i8* %a monotonic, align 1
+  ret void
+}
+; CHECK: atomic8_store_monotonic
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 1)
+
+define void @atomic8_store_release(i8* %a) nounwind uwtable {
+entry:
+  store atomic i8 0, i8* %a release, align 1
+  ret void
+}
+; CHECK: atomic8_store_release
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 8)
+
+define void @atomic8_store_seq_cst(i8* %a) nounwind uwtable {
+entry:
+  store atomic i8 0, i8* %a seq_cst, align 1
+  ret void
+}
+; CHECK: atomic8_store_seq_cst
+; CHECK: call void @__tsan_atomic8_store(i8* %a, i8 0, i32 32)
+
+define i16 @atomic16_load_unordered(i16* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i16* %a unordered, align 2
+  ret i16 %0
+}
+; CHECK: atomic16_load_unordered
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 1)
+
+define i16 @atomic16_load_monotonic(i16* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i16* %a monotonic, align 2
+  ret i16 %0
+}
+; CHECK: atomic16_load_monotonic
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 1)
+
+define i16 @atomic16_load_acquire(i16* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i16* %a acquire, align 2
+  ret i16 %0
+}
+; CHECK: atomic16_load_acquire
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 4)
+
+define i16 @atomic16_load_seq_cst(i16* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i16* %a seq_cst, align 2
+  ret i16 %0
+}
+; CHECK: atomic16_load_seq_cst
+; CHECK: call i16 @__tsan_atomic16_load(i16* %a, i32 32)
+
+define void @atomic16_store_unordered(i16* %a) nounwind uwtable {
+entry:
+  store atomic i16 0, i16* %a unordered, align 2
+  ret void
+}
+; CHECK: atomic16_store_unordered
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 1)
+
+define void @atomic16_store_monotonic(i16* %a) nounwind uwtable {
+entry:
+  store atomic i16 0, i16* %a monotonic, align 2
+  ret void
+}
+; CHECK: atomic16_store_monotonic
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 1)
+
+define void @atomic16_store_release(i16* %a) nounwind uwtable {
+entry:
+  store atomic i16 0, i16* %a release, align 2
+  ret void
+}
+; CHECK: atomic16_store_release
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 8)
+
+define void @atomic16_store_seq_cst(i16* %a) nounwind uwtable {
+entry:
+  store atomic i16 0, i16* %a seq_cst, align 2
+  ret void
+}
+; CHECK: atomic16_store_seq_cst
+; CHECK: call void @__tsan_atomic16_store(i16* %a, i16 0, i32 32)
+
+define i32 @atomic32_load_unordered(i32* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i32* %a unordered, align 4
+  ret i32 %0
+}
+; CHECK: atomic32_load_unordered
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 1)
+
+define i32 @atomic32_load_monotonic(i32* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i32* %a monotonic, align 4
+  ret i32 %0
+}
+; CHECK: atomic32_load_monotonic
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 1)
+
+define i32 @atomic32_load_acquire(i32* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i32* %a acquire, align 4
+  ret i32 %0
+}
+; CHECK: atomic32_load_acquire
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 4)
+
+define i32 @atomic32_load_seq_cst(i32* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i32* %a seq_cst, align 4
+  ret i32 %0
+}
+; CHECK: atomic32_load_seq_cst
+; CHECK: call i32 @__tsan_atomic32_load(i32* %a, i32 32)
+
+define void @atomic32_store_unordered(i32* %a) nounwind uwtable {
+entry:
+  store atomic i32 0, i32* %a unordered, align 4
+  ret void
+}
+; CHECK: atomic32_store_unordered
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 1)
+
+define void @atomic32_store_monotonic(i32* %a) nounwind uwtable {
+entry:
+  store atomic i32 0, i32* %a monotonic, align 4
+  ret void
+}
+; CHECK: atomic32_store_monotonic
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 1)
+
+define void @atomic32_store_release(i32* %a) nounwind uwtable {
+entry:
+  store atomic i32 0, i32* %a release, align 4
+  ret void
+}
+; CHECK: atomic32_store_release
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 8)
+
+define void @atomic32_store_seq_cst(i32* %a) nounwind uwtable {
+entry:
+  store atomic i32 0, i32* %a seq_cst, align 4
+  ret void
+}
+; CHECK: atomic32_store_seq_cst
+; CHECK: call void @__tsan_atomic32_store(i32* %a, i32 0, i32 32)
+
+define i64 @atomic64_load_unordered(i64* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i64* %a unordered, align 8
+  ret i64 %0
+}
+; CHECK: atomic64_load_unordered
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 1)
+
+define i64 @atomic64_load_monotonic(i64* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i64* %a monotonic, align 8
+  ret i64 %0
+}
+; CHECK: atomic64_load_monotonic
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 1)
+
+define i64 @atomic64_load_acquire(i64* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i64* %a acquire, align 8
+  ret i64 %0
+}
+; CHECK: atomic64_load_acquire
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 4)
+
+define i64 @atomic64_load_seq_cst(i64* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i64* %a seq_cst, align 8
+  ret i64 %0
+}
+; CHECK: atomic64_load_seq_cst
+; CHECK: call i64 @__tsan_atomic64_load(i64* %a, i32 32)
+
+define void @atomic64_store_unordered(i64* %a) nounwind uwtable {
+entry:
+  store atomic i64 0, i64* %a unordered, align 8
+  ret void
+}
+; CHECK: atomic64_store_unordered
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 1)
+
+define void @atomic64_store_monotonic(i64* %a) nounwind uwtable {
+entry:
+  store atomic i64 0, i64* %a monotonic, align 8
+  ret void
+}
+; CHECK: atomic64_store_monotonic
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 1)
+
+define void @atomic64_store_release(i64* %a) nounwind uwtable {
+entry:
+  store atomic i64 0, i64* %a release, align 8
+  ret void
+}
+; CHECK: atomic64_store_release
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 8)
+
+define void @atomic64_store_seq_cst(i64* %a) nounwind uwtable {
+entry:
+  store atomic i64 0, i64* %a seq_cst, align 8
+  ret void
+}
+; CHECK: atomic64_store_seq_cst
+; CHECK: call void @__tsan_atomic64_store(i64* %a, i64 0, i32 32)
+
+define i128 @atomic128_load_unordered(i128* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i128* %a unordered, align 16
+  ret i128 %0
+}
+; CHECK: atomic128_load_unordered
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 1)
+
+define i128 @atomic128_load_monotonic(i128* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i128* %a monotonic, align 16
+  ret i128 %0
+}
+; CHECK: atomic128_load_monotonic
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 1)
+
+define i128 @atomic128_load_acquire(i128* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i128* %a acquire, align 16
+  ret i128 %0
+}
+; CHECK: atomic128_load_acquire
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 4)
+
+define i128 @atomic128_load_seq_cst(i128* %a) nounwind uwtable {
+entry:
+  %0 = load atomic i128* %a seq_cst, align 16
+  ret i128 %0
+}
+; CHECK: atomic128_load_seq_cst
+; CHECK: call i128 @__tsan_atomic128_load(i128* %a, i32 32)
+
+define void @atomic128_store_unordered(i128* %a) nounwind uwtable {
+entry:
+  store atomic i128 0, i128* %a unordered, align 16
+  ret void
+}
+; CHECK: atomic128_store_unordered
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 1)
+
+define void @atomic128_store_monotonic(i128* %a) nounwind uwtable {
+entry:
+  store atomic i128 0, i128* %a monotonic, align 16
+  ret void
+}
+; CHECK: atomic128_store_monotonic
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 1)
+
+define void @atomic128_store_release(i128* %a) nounwind uwtable {
+entry:
+  store atomic i128 0, i128* %a release, align 16
+  ret void
+}
+; CHECK: atomic128_store_release
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 8)
+
+define void @atomic128_store_seq_cst(i128* %a) nounwind uwtable {
+entry:
+  store atomic i128 0, i128* %a seq_cst, align 16
+  ret void
+}
+; CHECK: atomic128_store_seq_cst
+; CHECK: call void @__tsan_atomic128_store(i128* %a, i128 0, i32 32)