tsan: speed up race deduplication
Race deduplication code proved to be a performance bottleneck in the past if suppressions/annotations are used, or just some races left unaddressed. And we still get user complaints about this:
https://groups.google.com/forum/#!topic/thread-sanitizer/hB0WyiTI4e4
ReportRace already has several layers of caching for racy pcs/addresses to make deduplication faster. However, ReportRace still takes a global mutex (ThreadRegistry and ReportMutex) during deduplication and also calls mmap/munmap (which take process-wide semaphore in kernel), this makes deduplication non-scalable.
This patch moves race deduplication outside of global mutexes and also removes all mmap/munmap calls.
As the result, race_stress.cc with 100 threads and 10000 iterations become 30x faster:
before:
real 0m21.673s
user 0m5.932s
sys 0m34.885s
after:
real 0m0.720s
user 0m23.646s
sys 0m1.254s
http://reviews.llvm.org/D12554
llvm-svn: 246758
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cc b/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cc
index fd3c846..62db796 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cc
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_ann.cc
@@ -63,8 +63,8 @@
struct ExpectRace {
ExpectRace *next;
ExpectRace *prev;
- int hitcount;
- int addcount;
+ atomic_uintptr_t hitcount;
+ atomic_uintptr_t addcount;
uptr addr;
uptr size;
char *file;
@@ -90,7 +90,8 @@
ExpectRace *race = list->next;
for (; race != list; race = race->next) {
if (race->addr == addr && race->size == size) {
- race->addcount++;
+ atomic_store_relaxed(&race->addcount,
+ atomic_load_relaxed(&race->addcount) + 1);
return;
}
}
@@ -100,8 +101,8 @@
race->file = f;
race->line = l;
race->desc[0] = 0;
- race->hitcount = 0;
- race->addcount = 1;
+ atomic_store_relaxed(&race->hitcount, 0);
+ atomic_store_relaxed(&race->addcount, 1);
if (desc) {
int i = 0;
for (; i < kMaxDescLen - 1 && desc[i]; i++)
@@ -130,7 +131,7 @@
return false;
DPrintf("Hit expected/benign race: %s addr=%zx:%d %s:%d\n",
race->desc, race->addr, (int)race->size, race->file, race->line);
- race->hitcount++;
+ atomic_fetch_add(&race->hitcount, 1, memory_order_relaxed);
return true;
}
@@ -146,7 +147,7 @@
}
bool IsExpectedReport(uptr addr, uptr size) {
- Lock lock(&dyn_ann_ctx->mtx);
+ ReadLock lock(&dyn_ann_ctx->mtx);
if (CheckContains(&dyn_ann_ctx->expect, addr, size))
return true;
if (CheckContains(&dyn_ann_ctx->benign, addr, size))
@@ -155,20 +156,21 @@
}
static void CollectMatchedBenignRaces(Vector<ExpectRace> *matched,
- int *unique_count, int *hit_count, int ExpectRace::*counter) {
+ int *unique_count, int *hit_count, atomic_uintptr_t ExpectRace::*counter) {
ExpectRace *list = &dyn_ann_ctx->benign;
for (ExpectRace *race = list->next; race != list; race = race->next) {
(*unique_count)++;
- if (race->*counter == 0)
+ const uptr cnt = atomic_load_relaxed(&(race->*counter));
+ if (cnt == 0)
continue;
- (*hit_count) += race->*counter;
+ *hit_count += cnt;
uptr i = 0;
for (; i < matched->Size(); i++) {
ExpectRace *race0 = &(*matched)[i];
if (race->line == race0->line
&& internal_strcmp(race->file, race0->file) == 0
&& internal_strcmp(race->desc, race0->desc) == 0) {
- race0->*counter += race->*counter;
+ atomic_fetch_add(&(race0->*counter), cnt, memory_order_relaxed);
break;
}
}
@@ -193,8 +195,8 @@
hit_count, (int)internal_getpid());
for (uptr i = 0; i < hit_matched.Size(); i++) {
Printf("%d %s:%d %s\n",
- hit_matched[i].hitcount, hit_matched[i].file,
- hit_matched[i].line, hit_matched[i].desc);
+ atomic_load_relaxed(&hit_matched[i].hitcount),
+ hit_matched[i].file, hit_matched[i].line, hit_matched[i].desc);
}
}
if (hit_matched.Size()) {
@@ -203,8 +205,8 @@
add_count, unique_count, (int)internal_getpid());
for (uptr i = 0; i < add_matched.Size(); i++) {
Printf("%d %s:%d %s\n",
- add_matched[i].addcount, add_matched[i].file,
- add_matched[i].line, add_matched[i].desc);
+ atomic_load_relaxed(&add_matched[i].addcount),
+ add_matched[i].file, add_matched[i].line, add_matched[i].desc);
}
}
}
@@ -303,7 +305,7 @@
Lock lock(&dyn_ann_ctx->mtx);
while (dyn_ann_ctx->expect.next != &dyn_ann_ctx->expect) {
ExpectRace *race = dyn_ann_ctx->expect.next;
- if (race->hitcount == 0) {
+ if (atomic_load_relaxed(&race->hitcount) == 0) {
ctx->nmissed_expected++;
ReportMissedExpectedRace(race);
}