tsan: fix vector clocks
the new optimizations break when thread ids gets reused (clocks go backwards)
add the necessary tests as well

llvm-svn: 206035
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.cc b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.cc
index ed5ed71..ed2c601 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.cc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.cc
@@ -17,8 +17,9 @@
 namespace __sanitizer {
 
 ThreadContextBase::ThreadContextBase(u32 tid)
-    : tid(tid), unique_id(0), os_id(0), user_id(0), status(ThreadStatusInvalid),
-      detached(false), reuse_count(0), parent_tid(0), next(0) {
+    : tid(tid), unique_id(0), reuse_count(), os_id(0), user_id(0),
+      status(ThreadStatusInvalid),
+      detached(false), parent_tid(0), next(0) {
   name[0] = '\0';
 }
 
@@ -78,7 +79,6 @@
 
 void ThreadContextBase::Reset() {
   status = ThreadStatusInvalid;
-  reuse_count++;
   SetName(0);
   OnReset();
 }
@@ -88,10 +88,11 @@
 const u32 ThreadRegistry::kUnknownTid = ~0U;
 
 ThreadRegistry::ThreadRegistry(ThreadContextFactory factory, u32 max_threads,
-                               u32 thread_quarantine_size)
+                               u32 thread_quarantine_size, u32 max_reuse)
     : context_factory_(factory),
       max_threads_(max_threads),
       thread_quarantine_size_(thread_quarantine_size),
+      max_reuse_(max_reuse),
       mtx_(),
       n_contexts_(0),
       total_threads_(0),
@@ -282,6 +283,9 @@
   dead_threads_.pop_front();
   CHECK_EQ(tctx->status, ThreadStatusDead);
   tctx->Reset();
+  tctx->reuse_count++;
+  if (max_reuse_ > 0 && tctx->reuse_count >= max_reuse_)
+    return;
   invalid_threads_.push_back(tctx);
 }
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h
index a59bba5..8bb7ff3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h
@@ -38,13 +38,13 @@
 
   const u32 tid;  // Thread ID. Main thread should have tid = 0.
   u64 unique_id;  // Unique thread ID.
+  u32 reuse_count;  // Number of times this tid was reused.
   uptr os_id;     // PID (used for reporting).
   uptr user_id;   // Some opaque user thread id (e.g. pthread_t).
   char name[64];  // As annotated by user.
 
   ThreadStatus status;
   bool detached;
-  int reuse_count;
 
   u32 parent_tid;
   ThreadContextBase *next;  // For storing thread contexts in a list.
@@ -77,7 +77,7 @@
   static const u32 kUnknownTid;
 
   ThreadRegistry(ThreadContextFactory factory, u32 max_threads,
-                 u32 thread_quarantine_size);
+                 u32 thread_quarantine_size, u32 max_reuse = 0);
   void GetNumberOfThreads(uptr *total = 0, uptr *running = 0, uptr *alive = 0);
   uptr GetMaxAliveThreads();
 
@@ -119,6 +119,7 @@
   const ThreadContextFactory context_factory_;
   const u32 max_threads_;
   const u32 thread_quarantine_size_;
+  const u32 max_reuse_;
 
   BlockingMutex mtx_;
 
diff --git a/compiler-rt/lib/tsan/Makefile.old b/compiler-rt/lib/tsan/Makefile.old
index a93fa5f..b982e66 100644
--- a/compiler-rt/lib/tsan/Makefile.old
+++ b/compiler-rt/lib/tsan/Makefile.old
@@ -1,5 +1,5 @@
 DEBUG=0
-LDFLAGS=-ldl -lpthread -pie
+LDFLAGS=-ldl -lrt -lpthread -pie
 CXXFLAGS = -std=c++11 -fPIE -fno-rtti -g -Wall -Werror \
 					 -DGTEST_HAS_RTTI=0 -DTSAN_DEBUG=$(DEBUG) -DSANITIZER_DEBUG=$(DEBUG)
 CLANG=clang
diff --git a/compiler-rt/lib/tsan/rtl/tsan_clock.cc b/compiler-rt/lib/tsan/rtl/tsan_clock.cc
index d234585..d40f40f 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_clock.cc
+++ b/compiler-rt/lib/tsan/rtl/tsan_clock.cc
@@ -69,13 +69,15 @@
 //
 // Description of SyncClock state:
 // clk_ - variable size vector clock, low kClkBits hold timestamp,
-//   the remaining bits hold "last_acq" counter;
-//   if last_acq == release_seq_, then the respective thread has already
+//   the remaining bits hold "acquired" flag (the actual value is thread's
+//   reused counter);
+//   if acquried == thr->reused_, then the respective thread has already
 //   acquired this clock (except possibly dirty_tids_).
 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
-//   need to acquire regardless of last_acq value;
+//   need to acquire regardless of "acquired" flag value;
 // release_store_tid_ - denotes that the clock state is a result of
 //   release-store operation by the thread with release_store_tid_ index.
+// release_store_reused_ - reuse count of release_store_tid_.
 
 // We don't have ThreadState in these methods, so this is an ugly hack that
 // works only in C++.
@@ -89,11 +91,15 @@
 
 const unsigned kInvalidTid = (unsigned)-1;
 
-ThreadClock::ThreadClock(unsigned tid)
-    : tid_(tid) {
-  DCHECK_LT(tid, kMaxTidInClock);
+ThreadClock::ThreadClock(unsigned tid, unsigned reused)
+    : tid_(tid)
+    , reused_(reused + 1) {  // 0 has special meaning
+  CHECK_LT(tid, kMaxTidInClock);
+  CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
   nclk_ = tid_ + 1;
+  last_acquire_ = 0;
   internal_memset(clk_, 0, sizeof(clk_));
+  clk_[tid_].reused = reused_;
 }
 
 void ThreadClock::acquire(const SyncClock *src) {
@@ -108,37 +114,25 @@
     return;
   }
 
-  // If the clock is a result of release-store operation, and the current thread
-  // has already acquired from that thread after or at that time,
-  // don't need to do anything (src can't contain anything new for the
-  // current thread).
-  unsigned tid1 = src->release_store_tid_;
-  if (tid1 != kInvalidTid && (src->clk_[tid1] & kClkMask) <= clk_[tid1]) {
-    CPP_STAT_INC(StatClockAcquireFastRelease);
-    return;
-  }
-
   // Check if we've already acquired src after the last release operation on src
   bool acquired = false;
   if (nclk > tid_) {
     CPP_STAT_INC(StatClockAcquireLarge);
-    u64 myepoch = src->clk_[tid_];
-    u64 last_acq = myepoch >> kClkBits;
-    if (last_acq == src->release_seq_) {
+    if (src->clk_[tid_].reused == reused_) {
       CPP_STAT_INC(StatClockAcquireRepeat);
       for (unsigned i = 0; i < kDirtyTids; i++) {
         unsigned tid = src->dirty_tids_[i];
         if (tid != kInvalidTid) {
-          u64 epoch = src->clk_[tid] & kClkMask;
-          if (clk_[tid] < epoch) {
-            clk_[tid] = epoch;
+          u64 epoch = src->clk_[tid].epoch;
+          if (clk_[tid].epoch < epoch) {
+            clk_[tid].epoch = epoch;
             acquired = true;
           }
         }
       }
       if (acquired) {
         CPP_STAT_INC(StatClockAcquiredSomething);
-        last_acquire_ = clk_[tid_];
+        last_acquire_ = clk_[tid_].epoch;
       }
       return;
     }
@@ -148,28 +142,26 @@
   CPP_STAT_INC(StatClockAcquireFull);
   nclk_ = max(nclk_, nclk);
   for (uptr i = 0; i < nclk; i++) {
-    u64 epoch = src->clk_[i] & kClkMask;
-    if (clk_[i] < epoch) {
-      clk_[i] = epoch;
+    u64 epoch = src->clk_[i].epoch;
+    if (clk_[i].epoch < epoch) {
+      clk_[i].epoch = epoch;
       acquired = true;
     }
   }
 
   // Remember that this thread has acquired this clock.
-  if (nclk > tid_) {
-    u64 myepoch = src->clk_[tid_];
-    src->clk_[tid_] = (myepoch & kClkMask) | (src->release_seq_ << kClkBits);
-  }
+  if (nclk > tid_)
+    src->clk_[tid_].reused = reused_;
 
   if (acquired) {
     CPP_STAT_INC(StatClockAcquiredSomething);
-    last_acquire_ = clk_[tid_];
+    last_acquire_ = clk_[tid_].epoch;
   }
 }
 
 void ThreadClock::release(SyncClock *dst) const {
-  DCHECK(nclk_ <= kMaxTid);
-  DCHECK(dst->clk_.Size() <= kMaxTid);
+  DCHECK_LE(nclk_, kMaxTid);
+  DCHECK_LE(dst->clk_.Size(), kMaxTid);
 
   if (dst->clk_.Size() == 0) {
     // ReleaseStore will correctly set release_store_tid_,
@@ -188,9 +180,10 @@
   // Check if we had not acquired anything from other threads
   // since the last release on dst. If so, we need to update
   // only dst->clk_[tid_].
-  if ((dst->clk_[tid_] & kClkMask) > last_acquire_) {
+  if (dst->clk_[tid_].epoch > last_acquire_) {
     UpdateCurrentThread(dst);
-    if (dst->release_store_tid_ != tid_)
+    if (dst->release_store_tid_ != tid_ ||
+        dst->release_store_reused_ != reused_)
       dst->release_store_tid_ = kInvalidTid;
     return;
   }
@@ -202,22 +195,23 @@
   if (acquired)
     CPP_STAT_INC(StatClockReleaseAcquired);
   // Update dst->clk_.
-  for (uptr i = 0; i < nclk_; i++)
-    dst->clk_[i] = max(dst->clk_[i] & kClkMask, clk_[i]);
-  // Clear last_acq in the remaining elements.
+  for (uptr i = 0; i < nclk_; i++) {
+    dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch);
+    dst->clk_[i].reused = 0;
+  }
+  // Clear 'acquired' flag in the remaining elements.
   if (nclk_ < dst->clk_.Size())
     CPP_STAT_INC(StatClockReleaseClearTail);
   for (uptr i = nclk_; i < dst->clk_.Size(); i++)
-    dst->clk_[i] = dst->clk_[i] & kClkMask;
-  // Since we've cleared all last_acq, we can reset release_seq_ as well.
-  dst->release_seq_ = 1;
+    dst->clk_[i].reused = 0;
   for (unsigned i = 0; i < kDirtyTids; i++)
     dst->dirty_tids_[i] = kInvalidTid;
   dst->release_store_tid_ = kInvalidTid;
+  dst->release_store_reused_ = 0;
   // If we've acquired dst, remember this fact,
   // so that we don't need to acquire it on next acquire.
   if (acquired)
-    dst->clk_[tid_] = dst->clk_[tid_] | (1ULL << kClkBits);
+    dst->clk_[tid_].reused = reused_;
 }
 
 void ThreadClock::ReleaseStore(SyncClock *dst) const {
@@ -232,7 +226,8 @@
   }
 
   if (dst->release_store_tid_ == tid_ &&
-      (dst->clk_[tid_] & kClkMask) > last_acquire_) {
+      dst->release_store_reused_ == reused_ &&
+      dst->clk_[tid_].epoch > last_acquire_) {
     CPP_STAT_INC(StatClockStoreFast);
     UpdateCurrentThread(dst);
     return;
@@ -240,21 +235,22 @@
 
   // O(N) release-store.
   CPP_STAT_INC(StatClockStoreFull);
-  for (uptr i = 0; i < nclk_; i++)
-    dst->clk_[i] = clk_[i];
+  for (uptr i = 0; i < nclk_; i++) {
+    dst->clk_[i].epoch = clk_[i].epoch;
+    dst->clk_[i].reused = 0;
+  }
   // Clear the tail of dst->clk_.
   if (nclk_ < dst->clk_.Size()) {
     internal_memset(&dst->clk_[nclk_], 0,
         (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0]));
     CPP_STAT_INC(StatClockStoreTail);
   }
-  // Since we've cleared all last_acq, we can reset release_seq_ as well.
-  dst->release_seq_ = 1;
   for (unsigned i = 0; i < kDirtyTids; i++)
     dst->dirty_tids_[i] = kInvalidTid;
   dst->release_store_tid_ = tid_;
+  dst->release_store_reused_ = reused_;
   // Rememeber that we don't need to acquire it in future.
-  dst->clk_[tid_] = clk_[tid_] | (1ULL << kClkBits);
+  dst->clk_[tid_].reused = reused_;
 }
 
 void ThreadClock::acq_rel(SyncClock *dst) {
@@ -265,8 +261,8 @@
 
 // Updates only single element related to the current thread in dst->clk_.
 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
-  // Update the threads time, but preserve last_acq.
-  dst->clk_[tid_] = clk_[tid_] | (dst->clk_[tid_] & ~kClkMask);
+  // Update the threads time, but preserve 'acquired' flag.
+  dst->clk_[tid_].epoch = clk_[tid_].epoch;
 
   for (unsigned i = 0; i < kDirtyTids; i++) {
     if (dst->dirty_tids_[i] == tid_) {
@@ -279,29 +275,23 @@
       return;
     }
   }
-  CPP_STAT_INC(StatClockReleaseFast3);
-  dst->release_seq_++;
+  // Reset all 'acquired' flags, O(N).
+  CPP_STAT_INC(StatClockReleaseSlow);
+  for (uptr i = 0; i < dst->clk_.Size(); i++) {
+    dst->clk_[i].reused = 0;
+  }
   for (unsigned i = 0; i < kDirtyTids; i++)
     dst->dirty_tids_[i] = kInvalidTid;
-  if ((dst->release_seq_ << kClkBits) == 0) {
-    CPP_STAT_INC(StatClockReleaseLastOverflow);
-    dst->release_seq_ = 1;
-    for (uptr i = 0; i < dst->clk_.Size(); i++)
-      dst->clk_[i] = dst->clk_[i] & kClkMask;
-  }
 }
 
 // Checks whether the current threads has already acquired src.
 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
-  u64 myepoch = src->clk_[tid_];
-  u64 last_acq = myepoch >> kClkBits;
-  if (last_acq != src->release_seq_)
+  if (src->clk_[tid_].reused != reused_)
     return false;
   for (unsigned i = 0; i < kDirtyTids; i++) {
     unsigned tid = src->dirty_tids_[i];
     if (tid != kInvalidTid) {
-      u64 epoch = src->clk_[tid] & kClkMask;
-      if (clk_[tid] < epoch)
+      if (clk_[tid].epoch < src->clk_[tid].epoch)
         return false;
     }
   }
@@ -312,32 +302,36 @@
 // This function is called only from weird places like AcquireGlobal.
 void ThreadClock::set(unsigned tid, u64 v) {
   DCHECK_LT(tid, kMaxTid);
-  DCHECK_GE(v, clk_[tid]);
-  clk_[tid] = v;
+  DCHECK_GE(v, clk_[tid].epoch);
+  clk_[tid].epoch = v;
   if (nclk_ <= tid)
     nclk_ = tid + 1;
-  last_acquire_ = clk_[tid_];
+  last_acquire_ = clk_[tid_].epoch;
 }
 
 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
   printf("clock=[");
   for (uptr i = 0; i < nclk_; i++)
-    printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
-  printf("] tid=%u last_acq=%llu", tid_, last_acquire_);
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
+  printf("] reused=[");
+  for (uptr i = 0; i < nclk_; i++)
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
+  printf("] tid=%u/%u last_acq=%llu",
+      tid_, reused_, last_acquire_);
 }
 
 SyncClock::SyncClock()
     : clk_(MBlockClock) {
+  release_store_tid_ = kInvalidTid;
+  release_store_reused_ = 0;
   for (uptr i = 0; i < kDirtyTids; i++)
     dirty_tids_[i] = kInvalidTid;
-  release_seq_ = 0;
-  release_store_tid_ = kInvalidTid;
 }
 
 void SyncClock::Reset() {
   clk_.Reset();
-  release_seq_ = 0;
   release_store_tid_ = kInvalidTid;
+  release_store_reused_ = 0;
   for (uptr i = 0; i < kDirtyTids; i++)
     dirty_tids_[i] = kInvalidTid;
 }
@@ -345,11 +339,12 @@
 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
   printf("clock=[");
   for (uptr i = 0; i < clk_.Size(); i++)
-    printf("%s%llu", i == 0 ? "" : ",", clk_[i] & kClkMask);
-  printf("] last_acq=[");
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
+  printf("] reused=[");
   for (uptr i = 0; i < clk_.Size(); i++)
-    printf("%s%llu", i == 0 ? "" : ",", clk_[i] >> kClkBits);
-  printf("] release_seq=%llu release_store_tid=%d dirty_tids=%d/%d",
-      release_seq_, release_store_tid_, dirty_tids_[0], dirty_tids_[1]);
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
+  printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
+      release_store_tid_, release_store_reused_,
+      dirty_tids_[0], dirty_tids_[1]);
 }
 }  // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_clock.h b/compiler-rt/lib/tsan/rtl/tsan_clock.h
index a57a0e4..931fde8 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_clock.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_clock.h
@@ -18,7 +18,10 @@
 
 namespace __tsan {
 
-const u64 kClkMask = (1ULL << kClkBits) - 1;
+struct ClockElem {
+  u64 epoch  : kClkBits;
+  u64 reused : 64 - kClkBits;
+};
 
 // The clock that lives in sync variables (mutexes, atomics, etc).
 class SyncClock {
@@ -31,7 +34,7 @@
 
   u64 get(unsigned tid) const {
     DCHECK_LT(tid, clk_.Size());
-    return clk_[tid] & kClkMask;
+    return clk_[tid].epoch;
   }
 
   void Reset();
@@ -39,34 +42,33 @@
   void DebugDump(int(*printf)(const char *s, ...));
 
  private:
-  u64 release_seq_;
   unsigned release_store_tid_;
+  unsigned release_store_reused_;
   static const uptr kDirtyTids = 2;
   unsigned dirty_tids_[kDirtyTids];
-  mutable Vector<u64> clk_;
+  mutable Vector<ClockElem> clk_;
   friend struct ThreadClock;
 };
 
 // The clock that lives in threads.
 struct ThreadClock {
  public:
-  explicit ThreadClock(unsigned tid);
+  explicit ThreadClock(unsigned tid, unsigned reused = 0);
 
   u64 get(unsigned tid) const {
     DCHECK_LT(tid, kMaxTidInClock);
-    DCHECK_EQ(clk_[tid], clk_[tid] & kClkMask);
-    return clk_[tid];
+    return clk_[tid].epoch;
   }
 
   void set(unsigned tid, u64 v);
 
   void set(u64 v) {
-    DCHECK_GE(v, clk_[tid_]);
-    clk_[tid_] = v;
+    DCHECK_GE(v, clk_[tid_].epoch);
+    clk_[tid_].epoch = v;
   }
 
   void tick() {
-    clk_[tid_]++;
+    clk_[tid_].epoch++;
   }
 
   uptr size() const {
@@ -78,14 +80,16 @@
   void acq_rel(SyncClock *dst);
   void ReleaseStore(SyncClock *dst) const;
 
+  void DebugReset();
   void DebugDump(int(*printf)(const char *s, ...));
 
  private:
   static const uptr kDirtyTids = SyncClock::kDirtyTids;
   const unsigned tid_;
+  const unsigned reused_;
   u64 last_acquire_;
   uptr nclk_;
-  u64 clk_[kMaxTidInClock];
+  ClockElem clk_[kMaxTidInClock];
 
   bool IsAlreadyAcquired(const SyncClock *src) const;
   void UpdateCurrentThread(SyncClock *dst) const;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h
index 13f7ece..5fff9e1 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_defs.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h
@@ -41,6 +41,7 @@
 const unsigned kMaxTid = 1 << kTidBits;
 const unsigned kMaxTidInClock = kMaxTid * 2;  // This includes msb 'freed' bit.
 const int kClkBits = 42;
+const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
 const uptr kShadowStackSize = 64 * 1024;
 const uptr kTraceStackSize = 256;
 
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cc b/compiler-rt/lib/tsan/rtl/tsan_rtl.cc
index f0dfb89..b0ae2dd 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cc
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cc
@@ -75,7 +75,7 @@
   , nreported()
   , nmissed_expected()
   , thread_registry(new(thread_registry_placeholder) ThreadRegistry(
-      CreateThreadContext, kMaxTid, kThreadQuarantineSize))
+      CreateThreadContext, kMaxTid, kThreadQuarantineSize, kMaxTidReuse))
   , racy_stacks(MBlockRacyStacks)
   , racy_addresses(MBlockRacyAddresses)
   , fired_suppressions(8) {
@@ -83,6 +83,7 @@
 
 // The objects are allocated in TLS, so one may rely on zero-initialization.
 ThreadState::ThreadState(Context *ctx, int tid, int unique_id, u64 epoch,
+                         unsigned reuse_count,
                          uptr stk_addr, uptr stk_size,
                          uptr tls_addr, uptr tls_size)
   : fast_state(tid, epoch)
@@ -90,7 +91,7 @@
   // they may be accessed before the ctor.
   // , ignore_reads_and_writes()
   // , ignore_interceptors()
-  , clock(tid)
+  , clock(tid, reuse_count)
 #ifndef TSAN_GO
   , jmp_bufs(MBlockJmpBuf)
 #endif
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
index 7fc8033..f862e52 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
@@ -467,6 +467,7 @@
   int nomalloc;
 
   explicit ThreadState(Context *ctx, int tid, int unique_id, u64 epoch,
+                       unsigned reuse_count,
                        uptr stk_addr, uptr stk_size,
                        uptr tls_addr, uptr tls_size);
 };
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cc b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cc
index 5b07394..7680090 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cc
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -85,8 +85,8 @@
   // from different threads.
   epoch0 = RoundUp(epoch1 + 1, kTracePartSize);
   epoch1 = (u64)-1;
-  new(thr) ThreadState(ctx, tid, unique_id,
-      epoch0, args->stk_addr, args->stk_size, args->tls_addr, args->tls_size);
+  new(thr) ThreadState(ctx, tid, unique_id, epoch0, reuse_count,
+      args->stk_addr, args->stk_size, args->tls_addr, args->tls_size);
 #ifndef TSAN_GO
   thr->shadow_stack = &ThreadTrace(thr->tid)->shadow_stack[0];
   thr->shadow_stack_pos = thr->shadow_stack;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_stat.cc b/compiler-rt/lib/tsan/rtl/tsan_stat.cc
index 0993ab2..b42348a 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_stat.cc
+++ b/compiler-rt/lib/tsan/rtl/tsan_stat.cc
@@ -85,11 +85,10 @@
   name[StatClockReleaseResize]           = "  resize                          ";
   name[StatClockReleaseFast1]            = "  fast1                           ";
   name[StatClockReleaseFast2]            = "  fast2                           ";
-  name[StatClockReleaseFast3]            = "  fast3                           ";
+  name[StatClockReleaseSlow]             = "  dirty overflow (slow)           ";
   name[StatClockReleaseFull]             = "  full (slow)                     ";
   name[StatClockReleaseAcquired]         = "  was acquired                    ";
   name[StatClockReleaseClearTail]        = "  clear tail                      ";
-  name[StatClockReleaseLastOverflow]     = "  last overflow                   ";
   name[StatClockStore]                   = "Clock release store               ";
   name[StatClockStoreResize]             = "  resize                          ";
   name[StatClockStoreFast]               = "  fast                            ";
diff --git a/compiler-rt/lib/tsan/rtl/tsan_stat.h b/compiler-rt/lib/tsan/rtl/tsan_stat.h
index 22327d3..8cdf146 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_stat.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_stat.h
@@ -82,11 +82,10 @@
   StatClockReleaseResize,
   StatClockReleaseFast1,
   StatClockReleaseFast2,
-  StatClockReleaseFast3,
+  StatClockReleaseSlow,
   StatClockReleaseFull,
   StatClockReleaseAcquired,
   StatClockReleaseClearTail,
-  StatClockReleaseLastOverflow,
   // Clocks - release store.
   StatClockStore,
   StatClockStoreResize,
diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cc b/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cc
index 127f0ef..ea36eb3 100644
--- a/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cc
+++ b/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cc
@@ -13,6 +13,7 @@
 #include "tsan_clock.h"
 #include "tsan_rtl.h"
 #include "gtest/gtest.h"
+#include <time.h>
 
 namespace __tsan {
 
@@ -63,6 +64,19 @@
   ASSERT_EQ(vector2.get(100), 1U);
 }
 
+TEST(Clock, RepeatedAcquire) {
+  ThreadClock thr1(1);
+  thr1.tick();
+  ThreadClock thr2(2);
+  thr2.tick();
+
+  SyncClock sync;
+  thr1.ReleaseStore(&sync);
+
+  thr2.acquire(&sync);
+  thr2.acquire(&sync);
+}
+
 TEST(Clock, ManyThreads) {
   SyncClock chunked;
   for (unsigned i = 0; i < 100; i++) {
@@ -130,6 +144,10 @@
   uptr size;
 
   SimpleSyncClock() {
+    Reset();
+  }
+
+  void Reset() {
     size = 0;
     for (uptr i = 0; i < kThreads; i++)
       clock[i] = 0;
@@ -211,9 +229,11 @@
   // Create kThreads thread clocks.
   SimpleThreadClock *thr0[kThreads];
   ThreadClock *thr1[kThreads];
+  unsigned reused[kThreads];
   for (unsigned i = 0; i < kThreads; i++) {
+    reused[i] = 0;
     thr0[i] = new SimpleThreadClock(i);
-    thr1[i] = new ThreadClock(i);
+    thr1[i] = new ThreadClock(i, reused[i]);
   }
 
   // Create kClocks sync clocks.
@@ -232,7 +252,7 @@
     thr0[tid]->tick();
     thr1[tid]->tick();
 
-    switch (rand() % 4) {
+    switch (rand() % 6) {
     case 0:
       if (printing)
         printf("acquire thr%d <- clk%d\n", tid, cid);
@@ -257,6 +277,24 @@
       thr0[tid]->ReleaseStore(sync0[cid]);
       thr1[tid]->ReleaseStore(sync1[cid]);
       break;
+    case 4:
+      if (printing)
+        printf("reset clk%d\n", cid);
+      sync0[cid]->Reset();
+      sync1[cid]->Reset();
+      break;
+    case 5:
+      if (printing)
+        printf("reset thr%d\n", tid);
+      u64 epoch = thr0[tid]->clock[tid] + 1;
+      reused[tid]++;
+      delete thr0[tid];
+      thr0[tid] = new SimpleThreadClock(tid);
+      thr0[tid]->clock[tid] = epoch;
+      delete thr1[tid];
+      thr1[tid] = new ThreadClock(tid, reused[tid]);
+      thr1[tid]->set(epoch);
+      break;
     }
 
     if (printing) {
@@ -297,7 +335,9 @@
 }
 
 TEST(Clock, Fuzzer) {
-  int seed = time(0);
+  timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  int seed = ts.tv_sec + ts.tv_nsec;
   printf("seed=%d\n", seed);
   srand(seed);
   if (!ClockFuzzer(false)) {
diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cc b/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cc
index c1dc2fd..fc4d6c3 100644
--- a/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cc
+++ b/compiler-rt/lib/tsan/tests/unit/tsan_stack_test.cc
@@ -18,7 +18,7 @@
 namespace __tsan {
 
 static void TestStackTrace(StackTrace *trace) {
-  ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0);
+  ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0, 0);
   uptr stack[128];
   thr.shadow_stack = &stack[0];
   thr.shadow_stack_pos = &stack[0];
@@ -62,7 +62,7 @@
   uptr buf[2];
   StackTrace trace(buf, 2);
 
-  ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0);
+  ThreadState thr(0, 0, 0, 0, 0, 0, 0, 0, 0);
   uptr stack[128];
   thr.shadow_stack = &stack[0];
   thr.shadow_stack_pos = &stack[0];