| //===-- tsan_clock.cc -----------------------------------------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file is a part of ThreadSanitizer (TSan), a race detector. |
| // |
| //===----------------------------------------------------------------------===// |
| #include "tsan_clock.h" |
| #include "tsan_rtl.h" |
| #include "sanitizer_common/sanitizer_placement_new.h" |
| |
| // SyncClock and ThreadClock implement vector clocks for sync variables |
| // (mutexes, atomic variables, file descriptors, etc) and threads, respectively. |
| // ThreadClock contains fixed-size vector clock for maximum number of threads. |
| // SyncClock contains growable vector clock for currently necessary number of |
| // threads. |
| // Together they implement very simple model of operations, namely: |
| // |
| // void ThreadClock::acquire(const SyncClock *src) { |
| // for (int i = 0; i < kMaxThreads; i++) |
| // clock[i] = max(clock[i], src->clock[i]); |
| // } |
| // |
| // void ThreadClock::release(SyncClock *dst) const { |
| // for (int i = 0; i < kMaxThreads; i++) |
| // dst->clock[i] = max(dst->clock[i], clock[i]); |
| // } |
| // |
| // void ThreadClock::ReleaseStore(SyncClock *dst) const { |
| // for (int i = 0; i < kMaxThreads; i++) |
| // dst->clock[i] = clock[i]; |
| // } |
| // |
| // void ThreadClock::acq_rel(SyncClock *dst) { |
| // acquire(dst); |
| // release(dst); |
| // } |
| // |
| // Conformance to this model is extensively verified in tsan_clock_test.cc. |
| // However, the implementation is significantly more complex. The complexity |
| // allows to implement important classes of use cases in O(1) instead of O(N). |
| // |
| // The use cases are: |
| // 1. Singleton/once atomic that has a single release-store operation followed |
| // by zillions of acquire-loads (the acquire-load is O(1)). |
| // 2. Thread-local mutex (both lock and unlock can be O(1)). |
| // 3. Leaf mutex (unlock is O(1)). |
| // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)). |
| // 5. An atomic with a single writer (writes can be O(1)). |
| // The implementation dynamically adopts to workload. So if an atomic is in |
| // read-only phase, these reads will be O(1); if it later switches to read/write |
| // phase, the implementation will correctly handle that by switching to O(N). |
| // |
| // Thread-safety note: all const operations on SyncClock's are conducted under |
| // a shared lock; all non-const operations on SyncClock's are conducted under |
| // an exclusive lock; ThreadClock's are private to respective threads and so |
| // do not need any protection. |
| // |
| // Description of ThreadClock state: |
| // clk_ - fixed size vector clock. |
| // nclk_ - effective size of the vector clock (the rest is zeros). |
| // tid_ - index of the thread associated with he clock ("current thread"). |
| // last_acquire_ - current thread time when it acquired something from |
| // other threads. |
| // |
| // Description of SyncClock state: |
| // clk_ - variable size vector clock, low kClkBits hold timestamp, |
| // the remaining bits hold "acquired" flag (the actual value is thread's |
| // reused counter); |
| // if acquried == thr->reused_, then the respective thread has already |
| // acquired this clock (except possibly dirty_tids_). |
| // dirty_tids_ - holds up to two indeces in the vector clock that other threads |
| // need to acquire regardless of "acquired" flag value; |
| // release_store_tid_ - denotes that the clock state is a result of |
| // release-store operation by the thread with release_store_tid_ index. |
| // release_store_reused_ - reuse count of release_store_tid_. |
| |
| // We don't have ThreadState in these methods, so this is an ugly hack that |
| // works only in C++. |
| #ifndef SANITIZER_GO |
| # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ) |
| #else |
| # define CPP_STAT_INC(typ) (void)0 |
| #endif |
| |
| namespace __tsan { |
| |
| const unsigned kInvalidTid = (unsigned)-1; |
| |
| ThreadClock::ThreadClock(unsigned tid, unsigned reused) |
| : tid_(tid) |
| , reused_(reused + 1) { // 0 has special meaning |
| CHECK_LT(tid, kMaxTidInClock); |
| CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits); |
| nclk_ = tid_ + 1; |
| last_acquire_ = 0; |
| internal_memset(clk_, 0, sizeof(clk_)); |
| clk_[tid_].reused = reused_; |
| } |
| |
| void ThreadClock::acquire(ClockCache *c, const SyncClock *src) { |
| DCHECK_LE(nclk_, kMaxTid); |
| DCHECK_LE(src->size_, kMaxTid); |
| CPP_STAT_INC(StatClockAcquire); |
| |
| // Check if it's empty -> no need to do anything. |
| const uptr nclk = src->size_; |
| if (nclk == 0) { |
| CPP_STAT_INC(StatClockAcquireEmpty); |
| return; |
| } |
| |
| // Check if we've already acquired src after the last release operation on src |
| bool acquired = false; |
| if (nclk > tid_) { |
| CPP_STAT_INC(StatClockAcquireLarge); |
| if (src->elem(tid_).reused == reused_) { |
| CPP_STAT_INC(StatClockAcquireRepeat); |
| for (unsigned i = 0; i < kDirtyTids; i++) { |
| unsigned tid = src->dirty_tids_[i]; |
| if (tid != kInvalidTid) { |
| u64 epoch = src->elem(tid).epoch; |
| if (clk_[tid].epoch < epoch) { |
| clk_[tid].epoch = epoch; |
| acquired = true; |
| } |
| } |
| } |
| if (acquired) { |
| CPP_STAT_INC(StatClockAcquiredSomething); |
| last_acquire_ = clk_[tid_].epoch; |
| } |
| return; |
| } |
| } |
| |
| // O(N) acquire. |
| CPP_STAT_INC(StatClockAcquireFull); |
| nclk_ = max(nclk_, nclk); |
| for (uptr i = 0; i < nclk; i++) { |
| u64 epoch = src->elem(i).epoch; |
| if (clk_[i].epoch < epoch) { |
| clk_[i].epoch = epoch; |
| acquired = true; |
| } |
| } |
| |
| // Remember that this thread has acquired this clock. |
| if (nclk > tid_) |
| src->elem(tid_).reused = reused_; |
| |
| if (acquired) { |
| CPP_STAT_INC(StatClockAcquiredSomething); |
| last_acquire_ = clk_[tid_].epoch; |
| } |
| } |
| |
| void ThreadClock::release(ClockCache *c, SyncClock *dst) const { |
| DCHECK_LE(nclk_, kMaxTid); |
| DCHECK_LE(dst->size_, kMaxTid); |
| |
| if (dst->size_ == 0) { |
| // ReleaseStore will correctly set release_store_tid_, |
| // which can be important for future operations. |
| ReleaseStore(c, dst); |
| return; |
| } |
| |
| CPP_STAT_INC(StatClockRelease); |
| // Check if we need to resize dst. |
| if (dst->size_ < nclk_) |
| dst->Resize(c, nclk_); |
| |
| // Check if we had not acquired anything from other threads |
| // since the last release on dst. If so, we need to update |
| // only dst->elem(tid_). |
| if (dst->elem(tid_).epoch > last_acquire_) { |
| UpdateCurrentThread(dst); |
| if (dst->release_store_tid_ != tid_ || |
| dst->release_store_reused_ != reused_) |
| dst->release_store_tid_ = kInvalidTid; |
| return; |
| } |
| |
| // O(N) release. |
| CPP_STAT_INC(StatClockReleaseFull); |
| // First, remember whether we've acquired dst. |
| bool acquired = IsAlreadyAcquired(dst); |
| if (acquired) |
| CPP_STAT_INC(StatClockReleaseAcquired); |
| // Update dst->clk_. |
| for (uptr i = 0; i < nclk_; i++) { |
| ClockElem &ce = dst->elem(i); |
| ce.epoch = max(ce.epoch, clk_[i].epoch); |
| ce.reused = 0; |
| } |
| // Clear 'acquired' flag in the remaining elements. |
| if (nclk_ < dst->size_) |
| CPP_STAT_INC(StatClockReleaseClearTail); |
| for (uptr i = nclk_; i < dst->size_; i++) |
| dst->elem(i).reused = 0; |
| for (unsigned i = 0; i < kDirtyTids; i++) |
| dst->dirty_tids_[i] = kInvalidTid; |
| dst->release_store_tid_ = kInvalidTid; |
| dst->release_store_reused_ = 0; |
| // If we've acquired dst, remember this fact, |
| // so that we don't need to acquire it on next acquire. |
| if (acquired) |
| dst->elem(tid_).reused = reused_; |
| } |
| |
| void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const { |
| DCHECK_LE(nclk_, kMaxTid); |
| DCHECK_LE(dst->size_, kMaxTid); |
| CPP_STAT_INC(StatClockStore); |
| |
| // Check if we need to resize dst. |
| if (dst->size_ < nclk_) |
| dst->Resize(c, nclk_); |
| |
| if (dst->release_store_tid_ == tid_ && |
| dst->release_store_reused_ == reused_ && |
| dst->elem(tid_).epoch > last_acquire_) { |
| CPP_STAT_INC(StatClockStoreFast); |
| UpdateCurrentThread(dst); |
| return; |
| } |
| |
| // O(N) release-store. |
| CPP_STAT_INC(StatClockStoreFull); |
| for (uptr i = 0; i < nclk_; i++) { |
| ClockElem &ce = dst->elem(i); |
| ce.epoch = clk_[i].epoch; |
| ce.reused = 0; |
| } |
| // Clear the tail of dst->clk_. |
| if (nclk_ < dst->size_) { |
| for (uptr i = nclk_; i < dst->size_; i++) { |
| ClockElem &ce = dst->elem(i); |
| ce.epoch = 0; |
| ce.reused = 0; |
| } |
| CPP_STAT_INC(StatClockStoreTail); |
| } |
| for (unsigned i = 0; i < kDirtyTids; i++) |
| dst->dirty_tids_[i] = kInvalidTid; |
| dst->release_store_tid_ = tid_; |
| dst->release_store_reused_ = reused_; |
| // Rememeber that we don't need to acquire it in future. |
| dst->elem(tid_).reused = reused_; |
| } |
| |
| void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) { |
| CPP_STAT_INC(StatClockAcquireRelease); |
| acquire(c, dst); |
| ReleaseStore(c, dst); |
| } |
| |
| // Updates only single element related to the current thread in dst->clk_. |
| void ThreadClock::UpdateCurrentThread(SyncClock *dst) const { |
| // Update the threads time, but preserve 'acquired' flag. |
| dst->elem(tid_).epoch = clk_[tid_].epoch; |
| |
| for (unsigned i = 0; i < kDirtyTids; i++) { |
| if (dst->dirty_tids_[i] == tid_) { |
| CPP_STAT_INC(StatClockReleaseFast1); |
| return; |
| } |
| if (dst->dirty_tids_[i] == kInvalidTid) { |
| CPP_STAT_INC(StatClockReleaseFast2); |
| dst->dirty_tids_[i] = tid_; |
| return; |
| } |
| } |
| // Reset all 'acquired' flags, O(N). |
| CPP_STAT_INC(StatClockReleaseSlow); |
| for (uptr i = 0; i < dst->size_; i++) |
| dst->elem(i).reused = 0; |
| for (unsigned i = 0; i < kDirtyTids; i++) |
| dst->dirty_tids_[i] = kInvalidTid; |
| } |
| |
| // Checks whether the current threads has already acquired src. |
| bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const { |
| if (src->elem(tid_).reused != reused_) |
| return false; |
| for (unsigned i = 0; i < kDirtyTids; i++) { |
| unsigned tid = src->dirty_tids_[i]; |
| if (tid != kInvalidTid) { |
| if (clk_[tid].epoch < src->elem(tid).epoch) |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| void SyncClock::Resize(ClockCache *c, uptr nclk) { |
| CPP_STAT_INC(StatClockReleaseResize); |
| if (RoundUpTo(nclk, ClockBlock::kClockCount) <= |
| RoundUpTo(size_, ClockBlock::kClockCount)) { |
| // Growing within the same block. |
| // Memory is already allocated, just increase the size. |
| size_ = nclk; |
| return; |
| } |
| if (nclk <= ClockBlock::kClockCount) { |
| // Grow from 0 to one-level table. |
| CHECK_EQ(size_, 0); |
| CHECK_EQ(tab_, 0); |
| CHECK_EQ(tab_idx_, 0); |
| size_ = nclk; |
| tab_idx_ = ctx->clock_alloc.Alloc(c); |
| tab_ = ctx->clock_alloc.Map(tab_idx_); |
| internal_memset(tab_, 0, sizeof(*tab_)); |
| return; |
| } |
| // Growing two-level table. |
| if (size_ == 0) { |
| // Allocate first level table. |
| tab_idx_ = ctx->clock_alloc.Alloc(c); |
| tab_ = ctx->clock_alloc.Map(tab_idx_); |
| internal_memset(tab_, 0, sizeof(*tab_)); |
| } else if (size_ <= ClockBlock::kClockCount) { |
| // Transform one-level table to two-level table. |
| u32 old = tab_idx_; |
| tab_idx_ = ctx->clock_alloc.Alloc(c); |
| tab_ = ctx->clock_alloc.Map(tab_idx_); |
| internal_memset(tab_, 0, sizeof(*tab_)); |
| tab_->table[0] = old; |
| } |
| // At this point we have first level table allocated. |
| // Add second level tables as necessary. |
| for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount); |
| i < nclk; i += ClockBlock::kClockCount) { |
| u32 idx = ctx->clock_alloc.Alloc(c); |
| ClockBlock *cb = ctx->clock_alloc.Map(idx); |
| internal_memset(cb, 0, sizeof(*cb)); |
| CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0); |
| tab_->table[i/ClockBlock::kClockCount] = idx; |
| } |
| size_ = nclk; |
| } |
| |
| // Sets a single element in the vector clock. |
| // This function is called only from weird places like AcquireGlobal. |
| void ThreadClock::set(unsigned tid, u64 v) { |
| DCHECK_LT(tid, kMaxTid); |
| DCHECK_GE(v, clk_[tid].epoch); |
| clk_[tid].epoch = v; |
| if (nclk_ <= tid) |
| nclk_ = tid + 1; |
| last_acquire_ = clk_[tid_].epoch; |
| } |
| |
| void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) { |
| printf("clock=["); |
| for (uptr i = 0; i < nclk_; i++) |
| printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch); |
| printf("] reused=["); |
| for (uptr i = 0; i < nclk_; i++) |
| printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused); |
| printf("] tid=%u/%u last_acq=%llu", |
| tid_, reused_, last_acquire_); |
| } |
| |
| SyncClock::SyncClock() |
| : release_store_tid_(kInvalidTid) |
| , release_store_reused_() |
| , tab_() |
| , tab_idx_() |
| , size_() { |
| for (uptr i = 0; i < kDirtyTids; i++) |
| dirty_tids_[i] = kInvalidTid; |
| } |
| |
| SyncClock::~SyncClock() { |
| // Reset must be called before dtor. |
| CHECK_EQ(size_, 0); |
| CHECK_EQ(tab_, 0); |
| CHECK_EQ(tab_idx_, 0); |
| } |
| |
| void SyncClock::Reset(ClockCache *c) { |
| if (size_ == 0) { |
| // nothing |
| } else if (size_ <= ClockBlock::kClockCount) { |
| // One-level table. |
| ctx->clock_alloc.Free(c, tab_idx_); |
| } else { |
| // Two-level table. |
| for (uptr i = 0; i < size_; i += ClockBlock::kClockCount) |
| ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]); |
| ctx->clock_alloc.Free(c, tab_idx_); |
| } |
| tab_ = 0; |
| tab_idx_ = 0; |
| size_ = 0; |
| release_store_tid_ = kInvalidTid; |
| release_store_reused_ = 0; |
| for (uptr i = 0; i < kDirtyTids; i++) |
| dirty_tids_[i] = kInvalidTid; |
| } |
| |
| ClockElem &SyncClock::elem(unsigned tid) const { |
| DCHECK_LT(tid, size_); |
| if (size_ <= ClockBlock::kClockCount) |
| return tab_->clock[tid]; |
| u32 idx = tab_->table[tid / ClockBlock::kClockCount]; |
| ClockBlock *cb = ctx->clock_alloc.Map(idx); |
| return cb->clock[tid % ClockBlock::kClockCount]; |
| } |
| |
| void SyncClock::DebugDump(int(*printf)(const char *s, ...)) { |
| printf("clock=["); |
| for (uptr i = 0; i < size_; i++) |
| printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch); |
| printf("] reused=["); |
| for (uptr i = 0; i < size_; i++) |
| printf("%s%llu", i == 0 ? "" : ",", elem(i).reused); |
| printf("] release_store_tid=%d/%d dirty_tids=%d/%d", |
| release_store_tid_, release_store_reused_, |
| dirty_tids_[0], dirty_tids_[1]); |
| } |
| } // namespace __tsan |