Update aosp/master compiler-rt for rebase to r256229

http://b/26987366

Change-Id: I0ca3d7d3f1b7926fcffcb5b467e79958de576437
diff --git a/lib/tsan/rtl/Makefile.old b/lib/tsan/rtl/Makefile.old
deleted file mode 100644
index ee7095e..0000000
--- a/lib/tsan/rtl/Makefile.old
+++ /dev/null
@@ -1,63 +0,0 @@
-CXXFLAGS = -std=c++11 -fPIE -g -Wall -Werror -fno-builtin -msse3 -DSANITIZER_DEBUG=$(DEBUG) -DTSAN_CONTAINS_UBSAN=0
-CLANG=clang
-ifeq ($(DEBUG), 0)
-  CXXFLAGS += -O3
-endif
-
-# For interception. FIXME: move interception one level higher.
-INTERCEPTION=../../interception
-COMMON=../../sanitizer_common
-INCLUDES= -I../.. -I../../../include
-EXTRA_CXXFLAGS=-fno-exceptions -fno-rtti
-NO_SYSROOT=--sysroot=.
-CXXFLAGS+=$(EXTRA_CXXFLAGS)
-CXXFLAGS+=$(CFLAGS)
-ifeq ($(DEBUG), 0)
-  CXXFLAGS+=-fomit-frame-pointer
-ifeq ($(CXX), g++)
-  CXXFLAGS+=-Wno-maybe-uninitialized
-  CXXFLAGS+=-Wframe-larger-than=512
-endif  # CXX=g++
-endif  # DEBUG=0
-
-ifeq ($(CXX), $(CLANG)++)
-  # Global constructors are banned.
-  CXXFLAGS+=-Wglobal-constructors
-endif
-
-
-
-all: libtsan.a
-
-LIBTSAN_HEADERS=$(wildcard *.h) \
-		$(wildcard $(INTERCEPTION)/*.h) \
-		$(wildcard $(COMMON)/*.h)
-LIBTSAN_SRC=$(wildcard *.cc)
-LIBTSAN_ASM_SRC=$(wildcard *.S)
-INTERCEPTION_SRC=$(wildcard $(INTERCEPTION)/*.cc)
-COMMON_SRC=$(wildcard $(COMMON)/*.cc)
-
-LIBTSAN_OBJ=$(patsubst %.cc,%.o,$(LIBTSAN_SRC)) \
-	    $(patsubst %.S,%.o,$(LIBTSAN_ASM_SRC)) \
-	    $(patsubst $(INTERCEPTION)/%.cc,%.o,$(INTERCEPTION_SRC)) \
-	    $(patsubst $(COMMON)/%.cc,%.o,$(COMMON_SRC))
-
-%_linux.o: %_linux.cc Makefile.old $(LIBTSAN_HEADERS)
-	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $<
-%.o: %.cc Makefile.old $(LIBTSAN_HEADERS)
-	$(CXX) $(CXXFLAGS) $(INCLUDES) $(NO_SYSROOT) -c $<
-%.o: $(INTERCEPTION)/%.cc Makefile.old $(LIBTSAN_HEADERS)
-	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
-%.o: $(COMMON)/%.cc Makefile.old $(LIBTSAN_HEADERS)
-	$(CXX) $(CXXFLAGS) $(INCLUDES) -c $< -o $@
-%.o: %.S
-	$(CXX) $(INCLUDES) -o $@ -c $<
-
-libtsan.a: $(LIBTSAN_OBJ)
-	ar ru $@ $(LIBTSAN_OBJ)
-
-libtsan_dummy.a: tsan_dummy_rtl.o
-	ar ru $@ $<
-
-clean:
-	rm -f *.o *.a
diff --git a/lib/tsan/rtl/tsan_clock.cc b/lib/tsan/rtl/tsan_clock.cc
index 59e3de4..1e2050d 100644
--- a/lib/tsan/rtl/tsan_clock.cc
+++ b/lib/tsan/rtl/tsan_clock.cc
@@ -90,8 +90,6 @@
 
 namespace __tsan {
 
-const unsigned kInvalidTid = (unsigned)-1;
-
 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
     : tid_(tid)
     , reused_(reused + 1) {  // 0 has special meaning
diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
index d869d95..9c7b329 100644
--- a/lib/tsan/rtl/tsan_defs.h
+++ b/lib/tsan/rtl/tsan_defs.h
@@ -38,13 +38,10 @@
 const bool kGoMode = true;
 const bool kCppMode = false;
 const char *const kTsanOptionsEnv = "GORACE";
-// Go linker does not support weak symbols.
-#define CPP_WEAK
 #else
 const bool kGoMode = false;
 const bool kCppMode = true;
 const char *const kTsanOptionsEnv = "TSAN_OPTIONS";
-#define CPP_WEAK WEAK
 #endif
 
 const int kTidBits = 13;
@@ -83,6 +80,8 @@
 const bool kCollectHistory = true;
 #endif
 
+const unsigned kInvalidTid = (unsigned)-1;
+
 // The following "build consistency" machinery ensures that all source files
 // are built in the same configuration. Inconsistent builds lead to
 // hard to debug crashes.
diff --git a/lib/tsan/rtl/tsan_dense_alloc.h b/lib/tsan/rtl/tsan_dense_alloc.h
index a1cf84b..e9815c9 100644
--- a/lib/tsan/rtl/tsan_dense_alloc.h
+++ b/lib/tsan/rtl/tsan_dense_alloc.h
@@ -108,7 +108,7 @@
       // Reserve 0 as invalid index.
       IndexT start = fillpos_ == 0 ? 1 : 0;
       for (IndexT i = start; i < kL2Size; i++) {
-        new(batch + i) T();
+        new(batch + i) T;
         *(IndexT*)(batch + i) = i + 1 + fillpos_ * kL2Size;
       }
       *(IndexT*)(batch + kL2Size - 1) = 0;
diff --git a/lib/tsan/rtl/tsan_fd.cc b/lib/tsan/rtl/tsan_fd.cc
index d18502f..d84df4a 100644
--- a/lib/tsan/rtl/tsan_fd.cc
+++ b/lib/tsan/rtl/tsan_fd.cc
@@ -91,7 +91,8 @@
 }
 
 // pd must be already ref'ed.
-static void init(ThreadState *thr, uptr pc, int fd, FdSync *s) {
+static void init(ThreadState *thr, uptr pc, int fd, FdSync *s,
+    bool write = true) {
   FdDesc *d = fddesc(thr, pc, fd);
   // As a matter of fact, we don't intercept all close calls.
   // See e.g. libc __res_iclose().
@@ -109,8 +110,13 @@
   }
   d->creation_tid = thr->tid;
   d->creation_stack = CurrentStackId(thr, pc);
-  // To catch races between fd usage and open.
-  MemoryRangeImitateWrite(thr, pc, (uptr)d, 8);
+  if (write) {
+    // To catch races between fd usage and open.
+    MemoryRangeImitateWrite(thr, pc, (uptr)d, 8);
+  } else {
+    // See the dup-related comment in FdClose.
+    MemoryRead(thr, pc, (uptr)d, kSizeLog8);
+  }
 }
 
 void FdInit() {
@@ -181,13 +187,25 @@
   MemoryRead(thr, pc, (uptr)d, kSizeLog8);
 }
 
-void FdClose(ThreadState *thr, uptr pc, int fd) {
+void FdClose(ThreadState *thr, uptr pc, int fd, bool write) {
   DPrintf("#%d: FdClose(%d)\n", thr->tid, fd);
   if (bogusfd(fd))
     return;
   FdDesc *d = fddesc(thr, pc, fd);
-  // To catch races between fd usage and close.
-  MemoryWrite(thr, pc, (uptr)d, kSizeLog8);
+  if (write) {
+    // To catch races between fd usage and close.
+    MemoryWrite(thr, pc, (uptr)d, kSizeLog8);
+  } else {
+    // This path is used only by dup2/dup3 calls.
+    // We do read instead of write because there is a number of legitimate
+    // cases where write would lead to false positives:
+    // 1. Some software dups a closed pipe in place of a socket before closing
+    //    the socket (to prevent races actually).
+    // 2. Some daemons dup /dev/null in place of stdin/stdout.
+    // On the other hand we have not seen cases when write here catches real
+    // bugs.
+    MemoryRead(thr, pc, (uptr)d, kSizeLog8);
+  }
   // We need to clear it, because if we do not intercept any call out there
   // that creates fd, we will hit false postives.
   MemoryResetRange(thr, pc, (uptr)d, 8);
@@ -204,15 +222,15 @@
   init(thr, pc, fd, &fdctx.filesync);
 }
 
-void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd) {
+void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd, bool write) {
   DPrintf("#%d: FdDup(%d, %d)\n", thr->tid, oldfd, newfd);
   if (bogusfd(oldfd) || bogusfd(newfd))
     return;
   // Ignore the case when user dups not yet connected socket.
   FdDesc *od = fddesc(thr, pc, oldfd);
   MemoryRead(thr, pc, (uptr)od, kSizeLog8);
-  FdClose(thr, pc, newfd);
-  init(thr, pc, newfd, ref(od->sync));
+  FdClose(thr, pc, newfd, write);
+  init(thr, pc, newfd, ref(od->sync), write);
 }
 
 void FdPipeCreate(ThreadState *thr, uptr pc, int rfd, int wfd) {
diff --git a/lib/tsan/rtl/tsan_fd.h b/lib/tsan/rtl/tsan_fd.h
index 75c616d..64dc84f 100644
--- a/lib/tsan/rtl/tsan_fd.h
+++ b/lib/tsan/rtl/tsan_fd.h
@@ -42,9 +42,9 @@
 void FdAcquire(ThreadState *thr, uptr pc, int fd);
 void FdRelease(ThreadState *thr, uptr pc, int fd);
 void FdAccess(ThreadState *thr, uptr pc, int fd);
-void FdClose(ThreadState *thr, uptr pc, int fd);
+void FdClose(ThreadState *thr, uptr pc, int fd, bool write = true);
 void FdFileCreate(ThreadState *thr, uptr pc, int fd);
-void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd);
+void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd, bool write);
 void FdPipeCreate(ThreadState *thr, uptr pc, int rfd, int wfd);
 void FdEventCreate(ThreadState *thr, uptr pc, int fd);
 void FdSignalCreate(ThreadState *thr, uptr pc, int fd);
diff --git a/lib/tsan/rtl/tsan_flags.cc b/lib/tsan/rtl/tsan_flags.cc
index 5de227a..7615231 100644
--- a/lib/tsan/rtl/tsan_flags.cc
+++ b/lib/tsan/rtl/tsan_flags.cc
@@ -29,8 +29,8 @@
 #ifdef TSAN_EXTERNAL_HOOKS
 extern "C" const char* __tsan_default_options();
 #else
-extern "C" SANITIZER_INTERFACE_ATTRIBUTE
-const char *WEAK __tsan_default_options() {
+SANITIZER_WEAK_DEFAULT_IMPL
+const char *__tsan_default_options() {
   return "";
 }
 #endif
@@ -61,11 +61,16 @@
     CommonFlags cf;
     cf.CopyFrom(*common_flags());
     cf.allow_addr2line = true;
-#ifndef SANITIZER_GO
-    cf.detect_deadlocks = true;
-#endif
+    if (kGoMode) {
+      // Does not work as expected for Go: runtime handles SIGABRT and crashes.
+      cf.abort_on_error = false;
+      // Go does not have mutexes.
+    } else {
+      cf.detect_deadlocks = true;
+    }
     cf.print_suppressions = false;
     cf.stack_trace_format = "    #%n %f %S %M";
+    cf.exitcode = 66;
     OverrideCommonFlags(cf);
   }
 
diff --git a/lib/tsan/rtl/tsan_flags.inc b/lib/tsan/rtl/tsan_flags.inc
index e499468..ab9ca99 100644
--- a/lib/tsan/rtl/tsan_flags.inc
+++ b/lib/tsan/rtl/tsan_flags.inc
@@ -45,7 +45,6 @@
     "If set, all atomics are effectively sequentially consistent (seq_cst), "
     "regardless of what user actually specified.")
 TSAN_FLAG(bool, print_benign, false, "Print matched \"benign\" races at exit.")
-TSAN_FLAG(int, exitcode, 66, "Override exit status if something was reported.")
 TSAN_FLAG(bool, halt_on_error, false, "Exit after first reported error.")
 TSAN_FLAG(int, atexit_sleep_ms, 1000,
           "Sleep in main thread before exiting for that many ms "
diff --git a/lib/tsan/rtl/tsan_interceptors.cc b/lib/tsan/rtl/tsan_interceptors.cc
index ca3be64..62c96cb 100644
--- a/lib/tsan/rtl/tsan_interceptors.cc
+++ b/lib/tsan/rtl/tsan_interceptors.cc
@@ -28,16 +28,42 @@
 #include "tsan_mman.h"
 #include "tsan_fd.h"
 
+#if SANITIZER_POSIX
+#include "sanitizer_common/sanitizer_posix.h"
+#endif
+
 using namespace __tsan;  // NOLINT
 
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD || SANITIZER_MAC
 #define __errno_location __error
-#define __libc_realloc __realloc
-#define __libc_calloc __calloc
 #define stdout __stdoutp
 #define stderr __stderrp
 #endif
 
+#if SANITIZER_FREEBSD
+#define __libc_realloc __realloc
+#define __libc_calloc __calloc
+#elif SANITIZER_MAC
+#define __libc_malloc REAL(malloc)
+#define __libc_realloc REAL(realloc)
+#define __libc_calloc REAL(calloc)
+#define __libc_free REAL(free)
+#elif SANITIZER_ANDROID
+#define __errno_location __errno
+#define __libc_malloc REAL(malloc)
+#define __libc_realloc REAL(realloc)
+#define __libc_calloc REAL(calloc)
+#define __libc_free REAL(free)
+#define mallopt(a, b)
+#endif
+
+#if SANITIZER_LINUX || SANITIZER_FREEBSD
+#define PTHREAD_CREATE_DETACHED 1
+#elif SANITIZER_MAC
+#define PTHREAD_CREATE_DETACHED 2
+#endif
+
+
 #ifdef __mips__
 const int kSigCount = 129;
 #else
@@ -60,6 +86,14 @@
 };
 #endif
 
+#if defined(__x86_64__) || defined(__mips__) \
+  || (defined(__powerpc64__) && defined(__BIG_ENDIAN__))
+#define PTHREAD_ABI_BASE  "GLIBC_2.3.2"
+#elif defined(__aarch64__) || (defined(__powerpc64__) \
+  && defined(__LITTLE_ENDIAN__))
+#define PTHREAD_ABI_BASE  "GLIBC_2.17"
+#endif
+
 extern "C" int pthread_attr_init(void *attr);
 extern "C" int pthread_attr_destroy(void *attr);
 DECLARE_REAL(int, pthread_attr_getdetachstate, void *, void *)
@@ -67,20 +101,23 @@
 extern "C" int pthread_key_create(unsigned *key, void (*destructor)(void* v));
 extern "C" int pthread_setspecific(unsigned key, const void *v);
 DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
-extern "C" int pthread_yield();
 extern "C" int pthread_sigmask(int how, const __sanitizer_sigset_t *set,
                                __sanitizer_sigset_t *oldset);
 // REAL(sigfillset) defined in common interceptors.
 DECLARE_REAL(int, sigfillset, __sanitizer_sigset_t *set)
 DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
+DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
+DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
 extern "C" void *pthread_self();
 extern "C" void _exit(int status);
 extern "C" int *__errno_location();
 extern "C" int fileno_unlocked(void *stream);
+#if !SANITIZER_ANDROID
 extern "C" void *__libc_calloc(uptr size, uptr n);
 extern "C" void *__libc_realloc(void *ptr, uptr size);
+#endif
 extern "C" int dirfd(void *dirp);
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_ANDROID
 extern "C" int mallopt(int param, int value);
 #endif
 extern __sanitizer_FILE *stdout, *stderr;
@@ -89,14 +126,16 @@
 const int EINVAL = 22;
 const int EBUSY = 16;
 const int EOWNERDEAD = 130;
+#if !SANITIZER_MAC
 const int EPOLL_CTL_ADD = 1;
+#endif
 const int SIGILL = 4;
 const int SIGABRT = 6;
 const int SIGFPE = 8;
 const int SIGSEGV = 11;
 const int SIGPIPE = 13;
 const int SIGTERM = 15;
-#ifdef __mips__
+#if defined(__mips__) || SANITIZER_MAC
 const int SIGBUS = 10;
 const int SIGSYS = 12;
 #else
@@ -104,7 +143,9 @@
 const int SIGSYS = 31;
 #endif
 void *const MAP_FAILED = (void*)-1;
+#if !SANITIZER_MAC
 const int PTHREAD_BARRIER_SERIAL_THREAD = -1;
+#endif
 const int MAP_FIXED = 0x10;
 typedef long long_t;  // NOLINT
 
@@ -119,6 +160,17 @@
 typedef void (*sighandler_t)(int sig);
 typedef void (*sigactionhandler_t)(int sig, my_siginfo_t *siginfo, void *uctx);
 
+#if SANITIZER_ANDROID
+struct sigaction_t {
+  u32 sa_flags;
+  union {
+    sighandler_t sa_handler;
+    sigactionhandler_t sa_sgiaction;
+  };
+  __sanitizer_sigset_t sa_mask;
+  void (*sa_restorer)();
+};
+#else
 struct sigaction_t {
 #ifdef __mips__
   u32 sa_flags;
@@ -130,6 +182,9 @@
 #if SANITIZER_FREEBSD
   int sa_flags;
   __sanitizer_sigset_t sa_mask;
+#elif SANITIZER_MAC
+  __sanitizer_sigset_t sa_mask;
+  int sa_flags;
 #else
   __sanitizer_sigset_t sa_mask;
 #ifndef __mips__
@@ -138,11 +193,12 @@
   void (*sa_restorer)();
 #endif
 };
+#endif
 
 const sighandler_t SIG_DFL = (sighandler_t)0;
 const sighandler_t SIG_IGN = (sighandler_t)1;
 const sighandler_t SIG_ERR = (sighandler_t)-1;
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD || SANITIZER_MAC
 const int SA_SIGINFO = 0x40;
 const int SIG_SETMASK = 3;
 #elif defined(__mips__)
@@ -171,6 +227,9 @@
   atomic_uintptr_t in_blocking_func;
   atomic_uintptr_t have_pending_signals;
   SignalDesc pending_signals[kSigCount];
+  // emptyset and oldset are too big for stack.
+  __sanitizer_sigset_t emptyset;
+  __sanitizer_sigset_t oldset;
 };
 
 // The object is 64-byte aligned, because we want hot data to be located in
@@ -203,7 +262,9 @@
   return ctx;
 }
 
+#if !SANITIZER_MAC
 static unsigned g_thread_finalize_key;
+#endif
 
 ScopedInterceptor::ScopedInterceptor(ThreadState *thr, const char *fname,
                                      uptr pc)
@@ -234,17 +295,20 @@
   }
 }
 
-#define SCOPED_TSAN_INTERCEPTOR(func, ...) \
-    SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \
-    if (REAL(func) == 0) { \
-      Report("FATAL: ThreadSanitizer: failed to intercept %s\n", #func); \
-      Die(); \
-    }                                                    \
-    if (thr->ignore_interceptors || thr->in_ignored_lib) \
-      return REAL(func)(__VA_ARGS__); \
-/**/
+void ScopedInterceptor::UserCallbackStart() {
+  if (in_ignored_lib_) {
+    thr_->in_ignored_lib = false;
+    ThreadIgnoreEnd(thr_, pc_);
+  }
+}
 
-#define TSAN_INTERCEPTOR(ret, func, ...) INTERCEPTOR(ret, func, __VA_ARGS__)
+void ScopedInterceptor::UserCallbackEnd() {
+  if (in_ignored_lib_) {
+    thr_->in_ignored_lib = true;
+    ThreadIgnoreBegin(thr_, pc_);
+  }
+}
+
 #define TSAN_INTERCEPT(func) INTERCEPT_FUNCTION(func)
 #if SANITIZER_FREEBSD
 # define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func)
@@ -329,6 +393,7 @@
 static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
       void *arg, void *dso);
 
+#if !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, atexit, void (*f)()) {
   if (cur_thread()->in_symbolizer)
     return 0;
@@ -337,6 +402,7 @@
   SCOPED_INTERCEPTOR_RAW(atexit, f);
   return setup_at_exit_wrapper(thr, pc, (void(*)())f, 0, 0);
 }
+#endif
 
 TSAN_INTERCEPTOR(int, __cxa_atexit, void (*f)(void *a), void *arg, void *dso) {
   if (cur_thread()->in_symbolizer)
@@ -359,6 +425,7 @@
   return res;
 }
 
+#if !SANITIZER_MAC
 static void on_exit_wrapper(int status, void *arg) {
   ThreadState *thr = cur_thread();
   uptr pc = 0;
@@ -383,6 +450,7 @@
   ThreadIgnoreEnd(thr, pc);
   return res;
 }
+#endif
 
 // Cleanup old bufs.
 static void JmpBufGarbageCollect(ThreadState *thr, uptr sp) {
@@ -390,7 +458,7 @@
     JmpBuf *buf = &thr->jmp_bufs[i];
     if (buf->sp <= sp) {
       uptr sz = thr->jmp_bufs.Size();
-      thr->jmp_bufs[i] = thr->jmp_bufs[sz - 1];
+      internal_memcpy(buf, &thr->jmp_bufs[sz - 1], sizeof(*buf));
       thr->jmp_bufs.PopBack();
       i--;
     }
@@ -417,11 +485,17 @@
 }
 
 static void LongJmp(ThreadState *thr, uptr *env) {
-#if SANITIZER_FREEBSD
+#ifdef __powerpc__
+  uptr mangled_sp = env[0];
+#elif SANITIZER_FREEBSD || SANITIZER_MAC
   uptr mangled_sp = env[2];
-#else
+#elif defined(SANITIZER_LINUX)
+# ifdef __aarch64__
+  uptr mangled_sp = env[13];
+# else
   uptr mangled_sp = env[6];
-#endif  // SANITIZER_FREEBSD
+# endif
+#endif
   // Find the saved buf by mangled_sp.
   for (uptr i = 0; i < thr->jmp_bufs.Size(); i++) {
     JmpBuf *buf = &thr->jmp_bufs[i];
@@ -451,6 +525,11 @@
   SetJmp(cur_thread(), sp, mangled_sp);
 }
 
+#if SANITIZER_MAC
+TSAN_INTERCEPTOR(int, setjmp, void *env);
+TSAN_INTERCEPTOR(int, _setjmp, void *env);
+TSAN_INTERCEPTOR(int, sigsetjmp, void *env);
+#else  // SANITIZER_MAC
 // Not called.  Merely to satisfy TSAN_INTERCEPT().
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE
 int __interceptor_setjmp(void *env);
@@ -489,6 +568,7 @@
 DEFINE_REAL(int, _setjmp, void *env)
 DEFINE_REAL(int, sigsetjmp, void *env)
 DEFINE_REAL(int, __sigsetjmp, void *env)
+#endif  // SANITIZER_MAC
 
 TSAN_INTERCEPTOR(void, longjmp, uptr *env, int val) {
   {
@@ -506,6 +586,7 @@
   REAL(siglongjmp)(env, val);
 }
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(void*, malloc, uptr size) {
   if (cur_thread()->in_symbolizer)
     return __libc_malloc(size);
@@ -572,6 +653,7 @@
   SCOPED_INTERCEPTOR_RAW(malloc_usable_size, p);
   return user_alloc_usable_size(p);
 }
+#endif
 
 TSAN_INTERCEPTOR(uptr, strlen, const char *s) {
   SCOPED_TSAN_INTERCEPTOR(strlen, s);
@@ -596,27 +678,18 @@
     MemoryAccessRange(thr, pc, (uptr)dst, size, true);
     MemoryAccessRange(thr, pc, (uptr)src, size, false);
   }
-  return internal_memcpy(dst, src, size);
-}
-
-TSAN_INTERCEPTOR(int, memcmp, const void *s1, const void *s2, uptr n) {
-  SCOPED_TSAN_INTERCEPTOR(memcmp, s1, s2, n);
-  int res = 0;
-  uptr len = 0;
-  for (; len < n; len++) {
-    if ((res = ((const unsigned char *)s1)[len] -
-               ((const unsigned char *)s2)[len]))
-      break;
-  }
-  MemoryAccessRange(thr, pc, (uptr)s1, len < n ? len + 1 : n, false);
-  MemoryAccessRange(thr, pc, (uptr)s2, len < n ? len + 1 : n, false);
-  return res;
+  // On OS X, calling internal_memcpy here will cause memory corruptions,
+  // because memcpy and memmove are actually aliases of the same implementation.
+  // We need to use internal_memmove here.
+  return internal_memmove(dst, src, size);
 }
 
 TSAN_INTERCEPTOR(void*, memmove, void *dst, void *src, uptr n) {
-  SCOPED_TSAN_INTERCEPTOR(memmove, dst, src, n);
-  MemoryAccessRange(thr, pc, (uptr)dst, n, true);
-  MemoryAccessRange(thr, pc, (uptr)src, n, false);
+  if (!COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) {
+    SCOPED_TSAN_INTERCEPTOR(memmove, dst, src, n);
+    MemoryAccessRange(thr, pc, (uptr)dst, n, true);
+    MemoryAccessRange(thr, pc, (uptr)src, n, false);
+  }
   return REAL(memmove)(dst, src, n);
 }
 
@@ -629,6 +702,7 @@
   return res;
 }
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(char*, strchrnul, char *s, int c) {
   SCOPED_TSAN_INTERCEPTOR(strchrnul, s, c);
   char *res = REAL(strchrnul)(s, c);
@@ -636,6 +710,7 @@
   READ_STRING(thr, pc, s, len);
   return res;
 }
+#endif
 
 TSAN_INTERCEPTOR(char*, strrchr, char *s, int c) {
   SCOPED_TSAN_INTERCEPTOR(strrchr, s, c);
@@ -679,8 +754,8 @@
   return true;
 }
 
-TSAN_INTERCEPTOR(void*, mmap, void *addr, long_t sz, int prot,
-                         int flags, int fd, unsigned off) {
+TSAN_INTERCEPTOR(void *, mmap, void *addr, SIZE_T sz, int prot, int flags,
+                 int fd, OFF_T off) {
   SCOPED_TSAN_INTERCEPTOR(mmap, addr, sz, prot, flags, fd, off);
   if (!fix_mmap_addr(&addr, sz, flags))
     return MAP_FAILED;
@@ -693,9 +768,9 @@
   return res;
 }
 
-#if !SANITIZER_FREEBSD
-TSAN_INTERCEPTOR(void*, mmap64, void *addr, long_t sz, int prot,
-                           int flags, int fd, u64 off) {
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(void *, mmap64, void *addr, SIZE_T sz, int prot, int flags,
+                 int fd, OFF64_T off) {
   SCOPED_TSAN_INTERCEPTOR(mmap64, addr, sz, prot, flags, fd, off);
   if (!fix_mmap_addr(&addr, sz, flags))
     return MAP_FAILED;
@@ -723,7 +798,7 @@
   return res;
 }
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(void*, memalign, uptr align, uptr sz) {
   SCOPED_INTERCEPTOR_RAW(memalign, align, sz);
   return user_alloc(thr, pc, sz, align);
@@ -733,6 +808,7 @@
 #define TSAN_MAYBE_INTERCEPT_MEMALIGN
 #endif
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(void*, aligned_alloc, uptr align, uptr sz) {
   SCOPED_INTERCEPTOR_RAW(memalign, align, sz);
   return user_alloc(thr, pc, sz, align);
@@ -742,8 +818,9 @@
   SCOPED_INTERCEPTOR_RAW(valloc, sz);
   return user_alloc(thr, pc, sz, GetPageSizeCached());
 }
+#endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(void*, pvalloc, uptr sz) {
   SCOPED_INTERCEPTOR_RAW(pvalloc, sz);
   sz = RoundUp(sz, GetPageSizeCached());
@@ -754,14 +831,33 @@
 #define TSAN_MAYBE_INTERCEPT_PVALLOC
 #endif
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(int, posix_memalign, void **memptr, uptr align, uptr sz) {
   SCOPED_INTERCEPTOR_RAW(posix_memalign, memptr, align, sz);
   *memptr = user_alloc(thr, pc, sz, align);
   return 0;
 }
+#endif
+
+// __cxa_guard_acquire and friends need to be intercepted in a special way -
+// regular interceptors will break statically-linked libstdc++. Linux
+// interceptors are especially defined as weak functions (so that they don't
+// cause link errors when user defines them as well). So they silently
+// auto-disable themselves when such symbol is already present in the binary. If
+// we link libstdc++ statically, it will bring own __cxa_guard_acquire which
+// will silently replace our interceptor.  That's why on Linux we simply export
+// these interceptors with INTERFACE_ATTRIBUTE.
+// On OS X, we don't support statically linking, so we just use a regular
+// interceptor.
+#if SANITIZER_MAC
+#define STDCXX_INTERCEPTOR TSAN_INTERCEPTOR
+#else
+#define STDCXX_INTERCEPTOR(rettype, name, ...) \
+  extern "C" rettype INTERFACE_ATTRIBUTE name(__VA_ARGS__)
+#endif
 
 // Used in thread-safe function static initialization.
-extern "C" int INTERFACE_ATTRIBUTE __cxa_guard_acquire(atomic_uint32_t *g) {
+STDCXX_INTERCEPTOR(int, __cxa_guard_acquire, atomic_uint32_t *g) {
   SCOPED_INTERCEPTOR_RAW(__cxa_guard_acquire, g);
   for (;;) {
     u32 cmp = atomic_load(g, memory_order_acquire);
@@ -777,17 +873,31 @@
   }
 }
 
-extern "C" void INTERFACE_ATTRIBUTE __cxa_guard_release(atomic_uint32_t *g) {
+STDCXX_INTERCEPTOR(void, __cxa_guard_release, atomic_uint32_t *g) {
   SCOPED_INTERCEPTOR_RAW(__cxa_guard_release, g);
   Release(thr, pc, (uptr)g);
   atomic_store(g, 1, memory_order_release);
 }
 
-extern "C" void INTERFACE_ATTRIBUTE __cxa_guard_abort(atomic_uint32_t *g) {
+STDCXX_INTERCEPTOR(void, __cxa_guard_abort, atomic_uint32_t *g) {
   SCOPED_INTERCEPTOR_RAW(__cxa_guard_abort, g);
   atomic_store(g, 0, memory_order_relaxed);
 }
 
+namespace __tsan {
+void DestroyThreadState() {
+  ThreadState *thr = cur_thread();
+  ThreadFinish(thr);
+  ThreadSignalContext *sctx = thr->signal_ctx;
+  if (sctx) {
+    thr->signal_ctx = 0;
+    UnmapOrDie(sctx, sizeof(*sctx));
+  }
+  cur_thread_finalize();
+}
+}  // namespace __tsan
+
+#if !SANITIZER_MAC
 static void thread_finalize(void *v) {
   uptr iter = (uptr)v;
   if (iter > 1) {
@@ -797,16 +907,9 @@
     }
     return;
   }
-  {
-    ThreadState *thr = cur_thread();
-    ThreadFinish(thr);
-    ThreadSignalContext *sctx = thr->signal_ctx;
-    if (sctx) {
-      thr->signal_ctx = 0;
-      UnmapOrDie(sctx, sizeof(*sctx));
-    }
-  }
+  DestroyThreadState();
 }
+#endif
 
 
 struct ThreadParam {
@@ -824,15 +927,17 @@
     ThreadState *thr = cur_thread();
     // Thread-local state is not initialized yet.
     ScopedIgnoreInterceptors ignore;
+#if !SANITIZER_MAC
     ThreadIgnoreBegin(thr, 0);
     if (pthread_setspecific(g_thread_finalize_key,
-                            (void *)kPthreadDestructorIterations)) {
+                            (void *)GetPthreadDestructorIterations())) {
       Printf("ThreadSanitizer: failed to set thread key\n");
       Die();
     }
     ThreadIgnoreEnd(thr, 0);
+#endif
     while ((tid = atomic_load(&p->tid, memory_order_acquire)) == 0)
-      pthread_yield();
+      internal_sched_yield();
     ThreadStart(thr, tid, GetTid());
     atomic_store(&p->tid, 0, memory_order_release);
   }
@@ -880,7 +985,8 @@
     ThreadIgnoreEnd(thr, pc);
   }
   if (res == 0) {
-    int tid = ThreadCreate(thr, pc, *(uptr*)th, detached);
+    int tid = ThreadCreate(thr, pc, *(uptr*)th,
+                           detached == PTHREAD_CREATE_DETACHED);
     CHECK_NE(tid, 0);
     // Synchronization on p.tid serves two purposes:
     // 1. ThreadCreate must finish before the new thread starts.
@@ -891,7 +997,7 @@
     //    before the new thread got a chance to acquire from it in ThreadStart.
     atomic_store(&p.tid, tid, memory_order_release);
     while (atomic_load(&p.tid, memory_order_acquire) != 0)
-      pthread_yield();
+      internal_sched_yield();
   }
   if (attr == &myattr)
     pthread_attr_destroy(&myattr);
@@ -992,8 +1098,8 @@
 INTERCEPTOR(int, pthread_cond_wait, void *c, void *m) {
   void *cond = init_cond(c);
   SCOPED_TSAN_INTERCEPTOR(pthread_cond_wait, cond, m);
-  MutexUnlock(thr, pc, (uptr)m);
   MemoryAccessRange(thr, pc, (uptr)c, sizeof(uptr), false);
+  MutexUnlock(thr, pc, (uptr)m);
   CondMutexUnlockCtx arg = {&si, thr, pc, m};
   int res = 0;
   // This ensures that we handle mutex lock even in case of pthread_cancel.
@@ -1014,8 +1120,8 @@
 INTERCEPTOR(int, pthread_cond_timedwait, void *c, void *m, void *abstime) {
   void *cond = init_cond(c);
   SCOPED_TSAN_INTERCEPTOR(pthread_cond_timedwait, cond, m, abstime);
-  MutexUnlock(thr, pc, (uptr)m);
   MemoryAccessRange(thr, pc, (uptr)c, sizeof(uptr), false);
+  MutexUnlock(thr, pc, (uptr)m);
   CondMutexUnlockCtx arg = {&si, thr, pc, m};
   int res = 0;
   // This ensures that we handle mutex lock even in case of pthread_cancel.
@@ -1094,6 +1200,7 @@
   return res;
 }
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(int, pthread_mutex_timedlock, void *m, void *abstime) {
   SCOPED_TSAN_INTERCEPTOR(pthread_mutex_timedlock, m, abstime);
   int res = REAL(pthread_mutex_timedlock)(m, abstime);
@@ -1102,7 +1209,9 @@
   }
   return res;
 }
+#endif
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(int, pthread_spin_init, void *m, int pshared) {
   SCOPED_TSAN_INTERCEPTOR(pthread_spin_init, m, pshared);
   int res = REAL(pthread_spin_init)(m, pshared);
@@ -1145,6 +1254,7 @@
   int res = REAL(pthread_spin_unlock)(m);
   return res;
 }
+#endif
 
 TSAN_INTERCEPTOR(int, pthread_rwlock_init, void *m, void *a) {
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_init, m, a);
@@ -1182,6 +1292,7 @@
   return res;
 }
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(int, pthread_rwlock_timedrdlock, void *m, void *abstime) {
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedrdlock, m, abstime);
   int res = REAL(pthread_rwlock_timedrdlock)(m, abstime);
@@ -1190,6 +1301,7 @@
   }
   return res;
 }
+#endif
 
 TSAN_INTERCEPTOR(int, pthread_rwlock_wrlock, void *m) {
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_wrlock, m);
@@ -1209,6 +1321,7 @@
   return res;
 }
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(int, pthread_rwlock_timedwrlock, void *m, void *abstime) {
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedwrlock, m, abstime);
   int res = REAL(pthread_rwlock_timedwrlock)(m, abstime);
@@ -1217,6 +1330,7 @@
   }
   return res;
 }
+#endif
 
 TSAN_INTERCEPTOR(int, pthread_rwlock_unlock, void *m) {
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_unlock, m);
@@ -1225,6 +1339,7 @@
   return res;
 }
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(int, pthread_barrier_init, void *b, void *a, unsigned count) {
   SCOPED_TSAN_INTERCEPTOR(pthread_barrier_init, b, a, count);
   MemoryWrite(thr, pc, (uptr)b, kSizeLog1);
@@ -1250,12 +1365,17 @@
   }
   return res;
 }
+#endif
 
 TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) {
   SCOPED_INTERCEPTOR_RAW(pthread_once, o, f);
   if (o == 0 || f == 0)
     return EINVAL;
-  atomic_uint32_t *a = static_cast<atomic_uint32_t*>(o);
+  atomic_uint32_t *a;
+  if (!SANITIZER_MAC)
+    a = static_cast<atomic_uint32_t*>(o);
+  else  // On OS X, pthread_once_t has a header with a long-sized signature.
+    a = static_cast<atomic_uint32_t*>((void *)((char *)o + sizeof(long_t)));
   u32 v = atomic_load(a, memory_order_acquire);
   if (v == 0 && atomic_compare_exchange_strong(a, &v, 1,
                                                memory_order_relaxed)) {
@@ -1265,7 +1385,7 @@
     atomic_store(a, 2, memory_order_release);
   } else {
     while (v != 2) {
-      pthread_yield();
+      internal_sched_yield();
       v = atomic_load(a, memory_order_acquire);
     }
     if (!thr->in_ignored_lib)
@@ -1274,62 +1394,7 @@
   return 0;
 }
 
-TSAN_INTERCEPTOR(int, sem_init, void *s, int pshared, unsigned value) {
-  SCOPED_TSAN_INTERCEPTOR(sem_init, s, pshared, value);
-  int res = REAL(sem_init)(s, pshared, value);
-  return res;
-}
-
-TSAN_INTERCEPTOR(int, sem_destroy, void *s) {
-  SCOPED_TSAN_INTERCEPTOR(sem_destroy, s);
-  int res = REAL(sem_destroy)(s);
-  return res;
-}
-
-TSAN_INTERCEPTOR(int, sem_wait, void *s) {
-  SCOPED_TSAN_INTERCEPTOR(sem_wait, s);
-  int res = BLOCK_REAL(sem_wait)(s);
-  if (res == 0) {
-    Acquire(thr, pc, (uptr)s);
-  }
-  return res;
-}
-
-TSAN_INTERCEPTOR(int, sem_trywait, void *s) {
-  SCOPED_TSAN_INTERCEPTOR(sem_trywait, s);
-  int res = BLOCK_REAL(sem_trywait)(s);
-  if (res == 0) {
-    Acquire(thr, pc, (uptr)s);
-  }
-  return res;
-}
-
-TSAN_INTERCEPTOR(int, sem_timedwait, void *s, void *abstime) {
-  SCOPED_TSAN_INTERCEPTOR(sem_timedwait, s, abstime);
-  int res = BLOCK_REAL(sem_timedwait)(s, abstime);
-  if (res == 0) {
-    Acquire(thr, pc, (uptr)s);
-  }
-  return res;
-}
-
-TSAN_INTERCEPTOR(int, sem_post, void *s) {
-  SCOPED_TSAN_INTERCEPTOR(sem_post, s);
-  Release(thr, pc, (uptr)s);
-  int res = REAL(sem_post)(s);
-  return res;
-}
-
-TSAN_INTERCEPTOR(int, sem_getvalue, void *s, int *sval) {
-  SCOPED_TSAN_INTERCEPTOR(sem_getvalue, s, sval);
-  int res = REAL(sem_getvalue)(s, sval);
-  if (res == 0) {
-    Acquire(thr, pc, (uptr)s);
-  }
-  return res;
-}
-
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, __xstat, int version, const char *path, void *buf) {
   SCOPED_TSAN_INTERCEPTOR(__xstat, version, path, buf);
   READ_STRING(thr, pc, path, 0);
@@ -1341,7 +1406,7 @@
 #endif
 
 TSAN_INTERCEPTOR(int, stat, const char *path, void *buf) {
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID
   SCOPED_TSAN_INTERCEPTOR(stat, path, buf);
   READ_STRING(thr, pc, path, 0);
   return REAL(stat)(path, buf);
@@ -1352,7 +1417,7 @@
 #endif
 }
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, __xstat64, int version, const char *path, void *buf) {
   SCOPED_TSAN_INTERCEPTOR(__xstat64, version, path, buf);
   READ_STRING(thr, pc, path, 0);
@@ -1363,7 +1428,7 @@
 #define TSAN_MAYBE_INTERCEPT___XSTAT64
 #endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, stat64, const char *path, void *buf) {
   SCOPED_TSAN_INTERCEPTOR(__xstat64, 0, path, buf);
   READ_STRING(thr, pc, path, 0);
@@ -1374,7 +1439,7 @@
 #define TSAN_MAYBE_INTERCEPT_STAT64
 #endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, __lxstat, int version, const char *path, void *buf) {
   SCOPED_TSAN_INTERCEPTOR(__lxstat, version, path, buf);
   READ_STRING(thr, pc, path, 0);
@@ -1386,7 +1451,7 @@
 #endif
 
 TSAN_INTERCEPTOR(int, lstat, const char *path, void *buf) {
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID
   SCOPED_TSAN_INTERCEPTOR(lstat, path, buf);
   READ_STRING(thr, pc, path, 0);
   return REAL(lstat)(path, buf);
@@ -1397,7 +1462,7 @@
 #endif
 }
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, __lxstat64, int version, const char *path, void *buf) {
   SCOPED_TSAN_INTERCEPTOR(__lxstat64, version, path, buf);
   READ_STRING(thr, pc, path, 0);
@@ -1408,7 +1473,7 @@
 #define TSAN_MAYBE_INTERCEPT___LXSTAT64
 #endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, lstat64, const char *path, void *buf) {
   SCOPED_TSAN_INTERCEPTOR(__lxstat64, 0, path, buf);
   READ_STRING(thr, pc, path, 0);
@@ -1419,7 +1484,7 @@
 #define TSAN_MAYBE_INTERCEPT_LSTAT64
 #endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) {
   SCOPED_TSAN_INTERCEPTOR(__fxstat, version, fd, buf);
   if (fd > 0)
@@ -1432,7 +1497,7 @@
 #endif
 
 TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) {
-#if SANITIZER_FREEBSD
+#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID
   SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf);
   if (fd > 0)
     FdAccess(thr, pc, fd);
@@ -1445,7 +1510,7 @@
 #endif
 }
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) {
   SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf);
   if (fd > 0)
@@ -1457,7 +1522,7 @@
 #define TSAN_MAYBE_INTERCEPT___FXSTAT64
 #endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) {
   SCOPED_TSAN_INTERCEPTOR(__fxstat64, 0, fd, buf);
   if (fd > 0)
@@ -1478,7 +1543,7 @@
   return fd;
 }
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, open64, const char *name, int flags, int mode) {
   SCOPED_TSAN_INTERCEPTOR(open64, name, flags, mode);
   READ_STRING(thr, pc, name, 0);
@@ -1501,7 +1566,7 @@
   return fd;
 }
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, creat64, const char *name, int mode) {
   SCOPED_TSAN_INTERCEPTOR(creat64, name, mode);
   READ_STRING(thr, pc, name, 0);
@@ -1519,7 +1584,7 @@
   SCOPED_TSAN_INTERCEPTOR(dup, oldfd);
   int newfd = REAL(dup)(oldfd);
   if (oldfd >= 0 && newfd >= 0 && newfd != oldfd)
-    FdDup(thr, pc, oldfd, newfd);
+    FdDup(thr, pc, oldfd, newfd, true);
   return newfd;
 }
 
@@ -1527,19 +1592,21 @@
   SCOPED_TSAN_INTERCEPTOR(dup2, oldfd, newfd);
   int newfd2 = REAL(dup2)(oldfd, newfd);
   if (oldfd >= 0 && newfd2 >= 0 && newfd2 != oldfd)
-    FdDup(thr, pc, oldfd, newfd2);
+    FdDup(thr, pc, oldfd, newfd2, false);
   return newfd2;
 }
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(int, dup3, int oldfd, int newfd, int flags) {
   SCOPED_TSAN_INTERCEPTOR(dup3, oldfd, newfd, flags);
   int newfd2 = REAL(dup3)(oldfd, newfd, flags);
   if (oldfd >= 0 && newfd2 >= 0 && newfd2 != oldfd)
-    FdDup(thr, pc, oldfd, newfd2);
+    FdDup(thr, pc, oldfd, newfd2, false);
   return newfd2;
 }
+#endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, eventfd, unsigned initval, int flags) {
   SCOPED_TSAN_INTERCEPTOR(eventfd, initval, flags);
   int fd = REAL(eventfd)(initval, flags);
@@ -1552,7 +1619,7 @@
 #define TSAN_MAYBE_INTERCEPT_EVENTFD
 #endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, signalfd, int fd, void *mask, int flags) {
   SCOPED_TSAN_INTERCEPTOR(signalfd, fd, mask, flags);
   if (fd >= 0)
@@ -1567,7 +1634,7 @@
 #define TSAN_MAYBE_INTERCEPT_SIGNALFD
 #endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, inotify_init, int fake) {
   SCOPED_TSAN_INTERCEPTOR(inotify_init, fake);
   int fd = REAL(inotify_init)(fake);
@@ -1580,7 +1647,7 @@
 #define TSAN_MAYBE_INTERCEPT_INOTIFY_INIT
 #endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, inotify_init1, int flags) {
   SCOPED_TSAN_INTERCEPTOR(inotify_init1, flags);
   int fd = REAL(inotify_init1)(flags);
@@ -1634,7 +1701,7 @@
   return res;
 }
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, epoll_create, int size) {
   SCOPED_TSAN_INTERCEPTOR(epoll_create, size);
   int fd = REAL(epoll_create)(size);
@@ -1647,7 +1714,7 @@
 #define TSAN_MAYBE_INTERCEPT_EPOLL_CREATE
 #endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, epoll_create1, int flags) {
   SCOPED_TSAN_INTERCEPTOR(epoll_create1, flags);
   int fd = REAL(epoll_create1)(flags);
@@ -1667,7 +1734,7 @@
   return REAL(close)(fd);
 }
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, __close, int fd) {
   SCOPED_TSAN_INTERCEPTOR(__close, fd);
   if (fd >= 0)
@@ -1680,7 +1747,7 @@
 #endif
 
 // glibc guts
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 TSAN_INTERCEPTOR(void, __res_iclose, void *state, bool free_addr) {
   SCOPED_TSAN_INTERCEPTOR(__res_iclose, state, free_addr);
   int fds[64];
@@ -1704,6 +1771,7 @@
   return res;
 }
 
+#if !SANITIZER_MAC
 TSAN_INTERCEPTOR(int, pipe2, int *pipefd, int flags) {
   SCOPED_TSAN_INTERCEPTOR(pipe2, pipefd, flags);
   int res = REAL(pipe2)(pipefd, flags);
@@ -1711,6 +1779,7 @@
     FdPipeCreate(thr, pc, pipefd[0], pipefd[1]);
   return res;
 }
+#endif
 
 TSAN_INTERCEPTOR(long_t, send, int fd, void *buf, long_t len, int flags) {
   SCOPED_TSAN_INTERCEPTOR(send, fd, buf, len, flags);
@@ -1761,7 +1830,7 @@
   return res;
 }
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(void*, tmpfile64, int fake) {
   SCOPED_TSAN_INTERCEPTOR(tmpfile64, fake);
   void *res = REAL(tmpfile64)(fake);
@@ -1828,7 +1897,7 @@
   return REAL(closedir)(dirp);
 }
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, epoll_ctl, int epfd, int op, int fd, void *ev) {
   SCOPED_TSAN_INTERCEPTOR(epoll_ctl, epfd, op, fd, ev);
   if (epfd >= 0)
@@ -1845,7 +1914,7 @@
 #define TSAN_MAYBE_INTERCEPT_EPOLL_CTL
 #endif
 
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, epoll_wait, int epfd, void *ev, int cnt, int timeout) {
   SCOPED_TSAN_INTERCEPTOR(epoll_wait, epfd, ev, cnt, timeout);
   if (epfd >= 0)
@@ -1895,7 +1964,7 @@
     ObtainCurrentStack(thr, StackTrace::GetNextInstructionPc(pc), &stack);
     ThreadRegistryLock l(ctx->thread_registry);
     ScopedReport rep(ReportTypeErrnoInSignal);
-    if (!IsFiredSuppression(ctx, rep, stack)) {
+    if (!IsFiredSuppression(ctx, ReportTypeErrnoInSignal, stack)) {
       rep.AddStack(stack, true);
       OutputReport(thr, rep);
     }
@@ -1910,10 +1979,8 @@
     return;
   atomic_store(&sctx->have_pending_signals, 0, memory_order_relaxed);
   atomic_fetch_add(&thr->in_signal_handler, 1, memory_order_relaxed);
-  // These are too big for stack.
-  static THREADLOCAL __sanitizer_sigset_t emptyset, oldset;
-  CHECK_EQ(0, REAL(sigfillset)(&emptyset));
-  CHECK_EQ(0, pthread_sigmask(SIG_SETMASK, &emptyset, &oldset));
+  CHECK_EQ(0, REAL(sigfillset)(&sctx->emptyset));
+  CHECK_EQ(0, pthread_sigmask(SIG_SETMASK, &sctx->emptyset, &sctx->oldset));
   for (int sig = 0; sig < kSigCount; sig++) {
     SignalDesc *signal = &sctx->pending_signals[sig];
     if (signal->armed) {
@@ -1922,7 +1989,7 @@
           &signal->siginfo, &signal->ctx);
     }
   }
-  CHECK_EQ(0, pthread_sigmask(SIG_SETMASK, &oldset, 0));
+  CHECK_EQ(0, pthread_sigmask(SIG_SETMASK, &sctx->oldset, 0));
   atomic_fetch_add(&thr->in_signal_handler, -1, memory_order_relaxed);
 }
 
@@ -2011,7 +2078,7 @@
   sigactions[sig].sa_flags = *(volatile int*)&act->sa_flags;
   internal_memcpy(&sigactions[sig].sa_mask, &act->sa_mask,
       sizeof(sigactions[sig].sa_mask));
-#if !SANITIZER_FREEBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC
   sigactions[sig].sa_restorer = act->sa_restorer;
 #endif
   sigaction_t newact;
@@ -2144,6 +2211,52 @@
   return WRAP(fork)(fake);
 }
 
+#if !SANITIZER_MAC && !SANITIZER_ANDROID
+typedef int (*dl_iterate_phdr_cb_t)(__sanitizer_dl_phdr_info *info, SIZE_T size,
+                                    void *data);
+struct dl_iterate_phdr_data {
+  ThreadState *thr;
+  uptr pc;
+  dl_iterate_phdr_cb_t cb;
+  void *data;
+};
+
+static bool IsAppNotRodata(uptr addr) {
+  return IsAppMem(addr) && *(u64*)MemToShadow(addr) != kShadowRodata;
+}
+
+static int dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size,
+                              void *data) {
+  dl_iterate_phdr_data *cbdata = (dl_iterate_phdr_data *)data;
+  // dlopen/dlclose allocate/free dynamic-linker-internal memory, which is later
+  // accessible in dl_iterate_phdr callback. But we don't see synchronization
+  // inside of dynamic linker, so we "unpoison" it here in order to not
+  // produce false reports. Ignoring malloc/free in dlopen/dlclose is not enough
+  // because some libc functions call __libc_dlopen.
+  if (info && IsAppNotRodata((uptr)info->dlpi_name))
+    MemoryResetRange(cbdata->thr, cbdata->pc, (uptr)info->dlpi_name,
+                     internal_strlen(info->dlpi_name));
+  int res = cbdata->cb(info, size, cbdata->data);
+  // Perform the check one more time in case info->dlpi_name was overwritten
+  // by user callback.
+  if (info && IsAppNotRodata((uptr)info->dlpi_name))
+    MemoryResetRange(cbdata->thr, cbdata->pc, (uptr)info->dlpi_name,
+                     internal_strlen(info->dlpi_name));
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, dl_iterate_phdr, dl_iterate_phdr_cb_t cb, void *data) {
+  SCOPED_TSAN_INTERCEPTOR(dl_iterate_phdr, cb, data);
+  dl_iterate_phdr_data cbdata;
+  cbdata.thr = thr;
+  cbdata.pc = pc;
+  cbdata.cb = cb;
+  cbdata.data = data;
+  int res = REAL(dl_iterate_phdr)(dl_iterate_phdr_cb, &cbdata);
+  return res;
+}
+#endif
+
 static int OnExit(ThreadState *thr) {
   int status = Finalize(thr);
   FlushStreams();
@@ -2156,6 +2269,7 @@
   const uptr pc;
 };
 
+#if !SANITIZER_MAC
 static void HandleRecvmsg(ThreadState *thr, uptr pc,
     __sanitizer_msghdr *msg) {
   int fds[64];
@@ -2163,6 +2277,7 @@
   for (int i = 0; i < cnt; i++)
     FdEventCreate(thr, pc, fds[i]);
 }
+#endif
 
 #include "sanitizer_common/sanitizer_platform_interceptors.h"
 // Causes interceptor recursion (getaddrinfo() and fopen())
@@ -2233,6 +2348,12 @@
 #define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() \
   libignore()->OnLibraryUnloaded()
 
+#define COMMON_INTERCEPTOR_ACQUIRE(ctx, u) \
+  Acquire(((TsanInterceptorContext *) ctx)->thr, pc, u)
+
+#define COMMON_INTERCEPTOR_RELEASE(ctx, u) \
+  Release(((TsanInterceptorContext *) ctx)->thr, pc, u)
+
 #define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path) \
   Acquire(((TsanInterceptorContext *) ctx)->thr, pc, Dir2addr(path))
 
@@ -2271,9 +2392,11 @@
   MutexRepair(((TsanInterceptorContext *)ctx)->thr, \
             ((TsanInterceptorContext *)ctx)->pc, (uptr)m)
 
+#if !SANITIZER_MAC
 #define COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, msg) \
   HandleRecvmsg(((TsanInterceptorContext *)ctx)->thr, \
       ((TsanInterceptorContext *)ctx)->pc, msg)
+#endif
 
 #define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end)                           \
   if (TsanThread *t = GetCurrentThread()) {                                    \
@@ -2305,6 +2428,7 @@
   }
 };
 
+#if !SANITIZER_MAC
 static void syscall_access_range(uptr pc, uptr p, uptr s, bool write) {
   TSAN_SYSCALL();
   MemoryAccessRange(thr, pc, p, s, write);
@@ -2358,6 +2482,7 @@
     ForkParentAfter(thr, pc);
   }
 }
+#endif
 
 #define COMMON_SYSCALL_PRE_READ_RANGE(p, s) \
   syscall_access_range(GET_CALLER_PC(), (uptr)(p), (uptr)(s), false)
@@ -2405,28 +2530,32 @@
   // Make sure the output is not lost.
   FlushStreams();
   if (status)
-    REAL(_exit)(status);
+    Die();
 }
 
+#if !SANITIZER_MAC && !SANITIZER_ANDROID
 static void unreachable() {
   Report("FATAL: ThreadSanitizer: unreachable called\n");
   Die();
 }
+#endif
 
 void InitializeInterceptors() {
+#if !SANITIZER_MAC
   // We need to setup it early, because functions like dlsym() can call it.
   REAL(memset) = internal_memset;
   REAL(memcpy) = internal_memcpy;
-  REAL(memcmp) = internal_memcmp;
+#endif
 
   // Instruct libc malloc to consume less memory.
-#if !SANITIZER_FREEBSD
+#if SANITIZER_LINUX
   mallopt(1, 0);  // M_MXFAST
   mallopt(-3, 32*1024);  // M_MMAP_THRESHOLD
 #endif
 
   InitializeCommonInterceptors();
 
+#if !SANITIZER_MAC
   // We can not use TSAN_INTERCEPT to get setjmp addr,
   // because it does &setjmp and setjmp is not present in some versions of libc.
   using __interception::GetRealFunctionAddress;
@@ -2434,6 +2563,7 @@
   GetRealFunctionAddress("_setjmp", (uptr*)&REAL(_setjmp), 0, 0);
   GetRealFunctionAddress("sigsetjmp", (uptr*)&REAL(sigsetjmp), 0, 0);
   GetRealFunctionAddress("__sigsetjmp", (uptr*)&REAL(__sigsetjmp), 0, 0);
+#endif
 
   TSAN_INTERCEPT(longjmp);
   TSAN_INTERCEPT(siglongjmp);
@@ -2456,7 +2586,6 @@
   TSAN_INTERCEPT(memset);
   TSAN_INTERCEPT(memcpy);
   TSAN_INTERCEPT(memmove);
-  TSAN_INTERCEPT(memcmp);
   TSAN_INTERCEPT(strchr);
   TSAN_INTERCEPT(strchrnul);
   TSAN_INTERCEPT(strrchr);
@@ -2468,12 +2597,12 @@
   TSAN_INTERCEPT(pthread_join);
   TSAN_INTERCEPT(pthread_detach);
 
-  TSAN_INTERCEPT_VER(pthread_cond_init, "GLIBC_2.3.2");
-  TSAN_INTERCEPT_VER(pthread_cond_signal, "GLIBC_2.3.2");
-  TSAN_INTERCEPT_VER(pthread_cond_broadcast, "GLIBC_2.3.2");
-  TSAN_INTERCEPT_VER(pthread_cond_wait, "GLIBC_2.3.2");
-  TSAN_INTERCEPT_VER(pthread_cond_timedwait, "GLIBC_2.3.2");
-  TSAN_INTERCEPT_VER(pthread_cond_destroy, "GLIBC_2.3.2");
+  TSAN_INTERCEPT_VER(pthread_cond_init, PTHREAD_ABI_BASE);
+  TSAN_INTERCEPT_VER(pthread_cond_signal, PTHREAD_ABI_BASE);
+  TSAN_INTERCEPT_VER(pthread_cond_broadcast, PTHREAD_ABI_BASE);
+  TSAN_INTERCEPT_VER(pthread_cond_wait, PTHREAD_ABI_BASE);
+  TSAN_INTERCEPT_VER(pthread_cond_timedwait, PTHREAD_ABI_BASE);
+  TSAN_INTERCEPT_VER(pthread_cond_destroy, PTHREAD_ABI_BASE);
 
   TSAN_INTERCEPT(pthread_mutex_init);
   TSAN_INTERCEPT(pthread_mutex_destroy);
@@ -2502,14 +2631,6 @@
 
   TSAN_INTERCEPT(pthread_once);
 
-  TSAN_INTERCEPT(sem_init);
-  TSAN_INTERCEPT(sem_destroy);
-  TSAN_INTERCEPT(sem_wait);
-  TSAN_INTERCEPT(sem_trywait);
-  TSAN_INTERCEPT(sem_timedwait);
-  TSAN_INTERCEPT(sem_post);
-  TSAN_INTERCEPT(sem_getvalue);
-
   TSAN_INTERCEPT(stat);
   TSAN_MAYBE_INTERCEPT___XSTAT;
   TSAN_MAYBE_INTERCEPT_STAT64;
@@ -2577,24 +2698,68 @@
 
   TSAN_INTERCEPT(fork);
   TSAN_INTERCEPT(vfork);
+#if !SANITIZER_ANDROID
+  TSAN_INTERCEPT(dl_iterate_phdr);
+#endif
   TSAN_INTERCEPT(on_exit);
   TSAN_INTERCEPT(__cxa_atexit);
   TSAN_INTERCEPT(_exit);
 
+#if !SANITIZER_MAC && !SANITIZER_ANDROID
   // Need to setup it, because interceptors check that the function is resolved.
   // But atexit is emitted directly into the module, so can't be resolved.
   REAL(atexit) = (int(*)(void(*)()))unreachable;
+#endif
+
   if (REAL(__cxa_atexit)(&finalize, 0, 0)) {
     Printf("ThreadSanitizer: failed to setup atexit callback\n");
     Die();
   }
 
+#if !SANITIZER_MAC
   if (pthread_key_create(&g_thread_finalize_key, &thread_finalize)) {
     Printf("ThreadSanitizer: failed to create thread key\n");
     Die();
   }
+#endif
 
   FdInit();
 }
 
 }  // namespace __tsan
+
+// Invisible barrier for tests.
+// There were several unsuccessful iterations for this functionality:
+// 1. Initially it was implemented in user code using
+//    REAL(pthread_barrier_wait). But pthread_barrier_wait is not supported on
+//    MacOS. Futexes are linux-specific for this matter.
+// 2. Then we switched to atomics+usleep(10). But usleep produced parasitic
+//    "as-if synchronized via sleep" messages in reports which failed some
+//    output tests.
+// 3. Then we switched to atomics+sched_yield. But this produced tons of tsan-
+//    visible events, which lead to "failed to restore stack trace" failures.
+// Note that no_sanitize_thread attribute does not turn off atomic interception
+// so attaching it to the function defined in user code does not help.
+// That's why we now have what we have.
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_testonly_barrier_init(u64 *barrier, u32 count) {
+  if (count >= (1 << 8)) {
+      Printf("barrier_init: count is too large (%d)\n", count);
+      Die();
+  }
+  // 8 lsb is thread count, the remaining are count of entered threads.
+  *barrier = count;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_testonly_barrier_wait(u64 *barrier) {
+  unsigned old = __atomic_fetch_add(barrier, 1 << 8, __ATOMIC_RELAXED);
+  unsigned old_epoch = (old >> 8) / (old & 0xff);
+  for (;;) {
+    unsigned cur = __atomic_load_n(barrier, __ATOMIC_RELAXED);
+    unsigned cur_epoch = (cur >> 8) / (cur & 0xff);
+    if (cur_epoch != old_epoch)
+      return;
+    internal_sched_yield();
+  }
+}
diff --git a/lib/tsan/rtl/tsan_interceptors.h b/lib/tsan/rtl/tsan_interceptors.h
index 49b79a7..d831620 100644
--- a/lib/tsan/rtl/tsan_interceptors.h
+++ b/lib/tsan/rtl/tsan_interceptors.h
@@ -10,6 +10,8 @@
  public:
   ScopedInterceptor(ThreadState *thr, const char *fname, uptr pc);
   ~ScopedInterceptor();
+  void UserCallbackStart();
+  void UserCallbackEnd();
  private:
   ThreadState *const thr_;
   const uptr pc_;
@@ -26,6 +28,24 @@
     (void)pc; \
 /**/
 
+#define SCOPED_TSAN_INTERCEPTOR(func, ...) \
+    SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__); \
+    if (REAL(func) == 0) { \
+      Report("FATAL: ThreadSanitizer: failed to intercept %s\n", #func); \
+      Die(); \
+    }                                                    \
+    if (thr->ignore_interceptors || thr->in_ignored_lib) \
+      return REAL(func)(__VA_ARGS__); \
+/**/
+
+#define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START() \
+    si.UserCallbackStart();
+
+#define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END() \
+    si.UserCallbackEnd();
+
+#define TSAN_INTERCEPTOR(ret, func, ...) INTERCEPTOR(ret, func, __VA_ARGS__)
+
 #if SANITIZER_FREEBSD
 #define __libc_free __free
 #define __libc_malloc __malloc
diff --git a/lib/tsan/rtl/tsan_interceptors_mac.cc b/lib/tsan/rtl/tsan_interceptors_mac.cc
new file mode 100644
index 0000000..2bf7ad9
--- /dev/null
+++ b/lib/tsan/rtl/tsan_interceptors_mac.cc
@@ -0,0 +1,91 @@
+//===-- tsan_interceptors_mac.cc ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Mac-specific interceptors.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_MAC
+
+#include "interception/interception.h"
+#include "tsan_interceptors.h"
+
+#include <libkern/OSAtomic.h>
+
+namespace __tsan {
+
+TSAN_INTERCEPTOR(void, OSSpinLockLock, volatile OSSpinLock *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(OSSpinLockLock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(OSSpinLockLock, lock);
+  REAL(OSSpinLockLock)(lock);
+  Acquire(thr, pc, (uptr)lock);
+}
+
+TSAN_INTERCEPTOR(bool, OSSpinLockTry, volatile OSSpinLock *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(OSSpinLockTry)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(OSSpinLockTry, lock);
+  bool result = REAL(OSSpinLockTry)(lock);
+  if (result)
+    Acquire(thr, pc, (uptr)lock);
+  return result;
+}
+
+TSAN_INTERCEPTOR(void, OSSpinLockUnlock, volatile OSSpinLock *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(OSSpinLockUnlock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(OSSpinLockUnlock, lock);
+  Release(thr, pc, (uptr)lock);
+  REAL(OSSpinLockUnlock)(lock);
+}
+
+TSAN_INTERCEPTOR(void, os_lock_lock, void *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(os_lock_lock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_lock_lock, lock);
+  REAL(os_lock_lock)(lock);
+  Acquire(thr, pc, (uptr)lock);
+}
+
+TSAN_INTERCEPTOR(bool, os_lock_trylock, void *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(os_lock_trylock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_lock_trylock, lock);
+  bool result = REAL(os_lock_trylock)(lock);
+  if (result)
+    Acquire(thr, pc, (uptr)lock);
+  return result;
+}
+
+TSAN_INTERCEPTOR(void, os_lock_unlock, void *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(os_lock_unlock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_lock_unlock, lock);
+  Release(thr, pc, (uptr)lock);
+  REAL(os_lock_unlock)(lock);
+}
+
+}  // namespace __tsan
+
+#endif  // SANITIZER_MAC
diff --git a/lib/tsan/rtl/tsan_interface_ann.cc b/lib/tsan/rtl/tsan_interface_ann.cc
index fd3c846..62db796 100644
--- a/lib/tsan/rtl/tsan_interface_ann.cc
+++ b/lib/tsan/rtl/tsan_interface_ann.cc
@@ -63,8 +63,8 @@
 struct ExpectRace {
   ExpectRace *next;
   ExpectRace *prev;
-  int hitcount;
-  int addcount;
+  atomic_uintptr_t hitcount;
+  atomic_uintptr_t addcount;
   uptr addr;
   uptr size;
   char *file;
@@ -90,7 +90,8 @@
   ExpectRace *race = list->next;
   for (; race != list; race = race->next) {
     if (race->addr == addr && race->size == size) {
-      race->addcount++;
+      atomic_store_relaxed(&race->addcount,
+          atomic_load_relaxed(&race->addcount) + 1);
       return;
     }
   }
@@ -100,8 +101,8 @@
   race->file = f;
   race->line = l;
   race->desc[0] = 0;
-  race->hitcount = 0;
-  race->addcount = 1;
+  atomic_store_relaxed(&race->hitcount, 0);
+  atomic_store_relaxed(&race->addcount, 1);
   if (desc) {
     int i = 0;
     for (; i < kMaxDescLen - 1 && desc[i]; i++)
@@ -130,7 +131,7 @@
     return false;
   DPrintf("Hit expected/benign race: %s addr=%zx:%d %s:%d\n",
       race->desc, race->addr, (int)race->size, race->file, race->line);
-  race->hitcount++;
+  atomic_fetch_add(&race->hitcount, 1, memory_order_relaxed);
   return true;
 }
 
@@ -146,7 +147,7 @@
 }
 
 bool IsExpectedReport(uptr addr, uptr size) {
-  Lock lock(&dyn_ann_ctx->mtx);
+  ReadLock lock(&dyn_ann_ctx->mtx);
   if (CheckContains(&dyn_ann_ctx->expect, addr, size))
     return true;
   if (CheckContains(&dyn_ann_ctx->benign, addr, size))
@@ -155,20 +156,21 @@
 }
 
 static void CollectMatchedBenignRaces(Vector<ExpectRace> *matched,
-    int *unique_count, int *hit_count, int ExpectRace::*counter) {
+    int *unique_count, int *hit_count, atomic_uintptr_t ExpectRace::*counter) {
   ExpectRace *list = &dyn_ann_ctx->benign;
   for (ExpectRace *race = list->next; race != list; race = race->next) {
     (*unique_count)++;
-    if (race->*counter == 0)
+    const uptr cnt = atomic_load_relaxed(&(race->*counter));
+    if (cnt == 0)
       continue;
-    (*hit_count) += race->*counter;
+    *hit_count += cnt;
     uptr i = 0;
     for (; i < matched->Size(); i++) {
       ExpectRace *race0 = &(*matched)[i];
       if (race->line == race0->line
           && internal_strcmp(race->file, race0->file) == 0
           && internal_strcmp(race->desc, race0->desc) == 0) {
-        race0->*counter += race->*counter;
+        atomic_fetch_add(&(race0->*counter), cnt, memory_order_relaxed);
         break;
       }
     }
@@ -193,8 +195,8 @@
         hit_count, (int)internal_getpid());
     for (uptr i = 0; i < hit_matched.Size(); i++) {
       Printf("%d %s:%d %s\n",
-          hit_matched[i].hitcount, hit_matched[i].file,
-          hit_matched[i].line, hit_matched[i].desc);
+          atomic_load_relaxed(&hit_matched[i].hitcount),
+          hit_matched[i].file, hit_matched[i].line, hit_matched[i].desc);
     }
   }
   if (hit_matched.Size()) {
@@ -203,8 +205,8 @@
         add_count, unique_count, (int)internal_getpid());
     for (uptr i = 0; i < add_matched.Size(); i++) {
       Printf("%d %s:%d %s\n",
-          add_matched[i].addcount, add_matched[i].file,
-          add_matched[i].line, add_matched[i].desc);
+          atomic_load_relaxed(&add_matched[i].addcount),
+          add_matched[i].file, add_matched[i].line, add_matched[i].desc);
     }
   }
 }
@@ -303,7 +305,7 @@
   Lock lock(&dyn_ann_ctx->mtx);
   while (dyn_ann_ctx->expect.next != &dyn_ann_ctx->expect) {
     ExpectRace *race = dyn_ann_ctx->expect.next;
-    if (race->hitcount == 0) {
+    if (atomic_load_relaxed(&race->hitcount) == 0) {
       ctx->nmissed_expected++;
       ReportMissedExpectedRace(race);
     }
diff --git a/lib/tsan/rtl/tsan_libdispatch_mac.cc b/lib/tsan/rtl/tsan_libdispatch_mac.cc
new file mode 100644
index 0000000..617dc91
--- /dev/null
+++ b/lib/tsan/rtl/tsan_libdispatch_mac.cc
@@ -0,0 +1,284 @@
+//===-- tsan_libdispatch_mac.cc -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Mac-specific libdispatch (GCD) support.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_MAC
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "interception/interception.h"
+#include "tsan_interceptors.h"
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+
+#include <Block.h>
+#include <dispatch/dispatch.h>
+#include <pthread.h>
+
+typedef long long_t;  // NOLINT
+
+namespace __tsan {
+
+typedef struct {
+  dispatch_queue_t queue;
+  void *orig_context;
+  dispatch_function_t orig_work;
+  uptr object_to_acquire;
+  dispatch_object_t object_to_release;
+} tsan_block_context_t;
+
+// The offsets of different fields of the dispatch_queue_t structure, exported
+// by libdispatch.dylib.
+extern "C" struct dispatch_queue_offsets_s {
+  const uint16_t dqo_version;
+  const uint16_t dqo_label;
+  const uint16_t dqo_label_size;
+  const uint16_t dqo_flags;
+  const uint16_t dqo_flags_size;
+  const uint16_t dqo_serialnum;
+  const uint16_t dqo_serialnum_size;
+  const uint16_t dqo_width;
+  const uint16_t dqo_width_size;
+  const uint16_t dqo_running;
+  const uint16_t dqo_running_size;
+  const uint16_t dqo_suspend_cnt;
+  const uint16_t dqo_suspend_cnt_size;
+  const uint16_t dqo_target_queue;
+  const uint16_t dqo_target_queue_size;
+  const uint16_t dqo_priority;
+  const uint16_t dqo_priority_size;
+} dispatch_queue_offsets;
+
+static bool IsQueueSerial(dispatch_queue_t q) {
+  CHECK_EQ(dispatch_queue_offsets.dqo_width_size, 2);
+  uptr width = *(uint16_t *)(((uptr)q) + dispatch_queue_offsets.dqo_width);
+  CHECK_NE(width, 0);
+  return width == 1;
+}
+
+static tsan_block_context_t *AllocContext(ThreadState *thr, uptr pc,
+                                          dispatch_queue_t queue,
+                                          void *orig_context,
+                                          dispatch_function_t orig_work) {
+  tsan_block_context_t *new_context =
+      (tsan_block_context_t *)user_alloc(thr, pc, sizeof(tsan_block_context_t));
+  new_context->queue = queue;
+  new_context->orig_context = orig_context;
+  new_context->orig_work = orig_work;
+  new_context->object_to_acquire = (uptr)new_context;
+  new_context->object_to_release = nullptr;
+  return new_context;
+}
+
+static void dispatch_callback_wrap_acquire(void *param) {
+  SCOPED_INTERCEPTOR_RAW(dispatch_async_f_callback_wrap);
+  tsan_block_context_t *context = (tsan_block_context_t *)param;
+  Acquire(thr, pc, context->object_to_acquire);
+
+  // Extra retain/release is required for dispatch groups. We use the group
+  // itself to synchronize, but in a notification (dispatch_group_notify
+  // callback), it may be disposed already. To solve this, we retain the group
+  // and release it here.
+  if (context->object_to_release) dispatch_release(context->object_to_release);
+
+  // In serial queues, work items can be executed on different threads, we need
+  // to explicitly synchronize on the queue itself.
+  if (IsQueueSerial(context->queue)) Acquire(thr, pc, (uptr)context->queue);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  context->orig_work(context->orig_context);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+  if (IsQueueSerial(context->queue)) Release(thr, pc, (uptr)context->queue);
+  user_free(thr, pc, context);
+}
+
+static void invoke_and_release_block(void *param) {
+  dispatch_block_t block = (dispatch_block_t)param;
+  block();
+  Block_release(block);
+}
+
+#define DISPATCH_INTERCEPT_B(name)                                           \
+  TSAN_INTERCEPTOR(void, name, dispatch_queue_t q, dispatch_block_t block) { \
+    SCOPED_TSAN_INTERCEPTOR(name, q, block);                                 \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START(); \
+    dispatch_block_t heap_block = Block_copy(block);                         \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END(); \
+    tsan_block_context_t *new_context =                                      \
+        AllocContext(thr, pc, q, heap_block, &invoke_and_release_block);     \
+    Release(thr, pc, (uptr)new_context);                                     \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START(); \
+    REAL(name##_f)(q, new_context, dispatch_callback_wrap_acquire);          \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END(); \
+  }
+
+#define DISPATCH_INTERCEPT_F(name)                                \
+  TSAN_INTERCEPTOR(void, name, dispatch_queue_t q, void *context, \
+                   dispatch_function_t work) {                    \
+    SCOPED_TSAN_INTERCEPTOR(name, q, context, work);              \
+    tsan_block_context_t *new_context =                           \
+        AllocContext(thr, pc, q, context, work);                  \
+    Release(thr, pc, (uptr)new_context);                          \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START(); \
+    REAL(name)(q, new_context, dispatch_callback_wrap_acquire);   \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END(); \
+  }
+
+// We wrap dispatch_async, dispatch_sync and friends where we allocate a new
+// context, which is used to synchronize (we release the context before
+// submitting, and the callback acquires it before executing the original
+// callback).
+DISPATCH_INTERCEPT_B(dispatch_async)
+DISPATCH_INTERCEPT_B(dispatch_barrier_async)
+DISPATCH_INTERCEPT_F(dispatch_async_f)
+DISPATCH_INTERCEPT_F(dispatch_barrier_async_f)
+DISPATCH_INTERCEPT_B(dispatch_sync)
+DISPATCH_INTERCEPT_B(dispatch_barrier_sync)
+DISPATCH_INTERCEPT_F(dispatch_sync_f)
+DISPATCH_INTERCEPT_F(dispatch_barrier_sync_f)
+
+// GCD's dispatch_once implementation has a fast path that contains a racy read
+// and it's inlined into user's code. Furthermore, this fast path doesn't
+// establish a proper happens-before relations between the initialization and
+// code following the call to dispatch_once. We could deal with this in
+// instrumented code, but there's not much we can do about it in system
+// libraries. Let's disable the fast path (by never storing the value ~0 to
+// predicate), so the interceptor is always called, and let's add proper release
+// and acquire semantics. Since TSan does not see its own atomic stores, the
+// race on predicate won't be reported - the only accesses to it that TSan sees
+// are the loads on the fast path. Loads don't race. Secondly, dispatch_once is
+// both a macro and a real function, we want to intercept the function, so we
+// need to undefine the macro.
+#undef dispatch_once
+TSAN_INTERCEPTOR(void, dispatch_once, dispatch_once_t *predicate,
+                 dispatch_block_t block) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_once, predicate, block);
+  atomic_uint32_t *a = reinterpret_cast<atomic_uint32_t *>(predicate);
+  u32 v = atomic_load(a, memory_order_acquire);
+  if (v == 0 &&
+      atomic_compare_exchange_strong(a, &v, 1, memory_order_relaxed)) {
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+    block();
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+    Release(thr, pc, (uptr)a);
+    atomic_store(a, 2, memory_order_release);
+  } else {
+    while (v != 2) {
+      internal_sched_yield();
+      v = atomic_load(a, memory_order_acquire);
+    }
+    Acquire(thr, pc, (uptr)a);
+  }
+}
+
+#undef dispatch_once_f
+TSAN_INTERCEPTOR(void, dispatch_once_f, dispatch_once_t *predicate,
+                 void *context, dispatch_function_t function) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_once_f, predicate, context, function);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  WRAP(dispatch_once)(predicate, ^(void) {
+    function(context);
+  });
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+}
+
+TSAN_INTERCEPTOR(long_t, dispatch_semaphore_signal,
+                 dispatch_semaphore_t dsema) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_semaphore_signal, dsema);
+  Release(thr, pc, (uptr)dsema);
+  return REAL(dispatch_semaphore_signal)(dsema);
+}
+
+TSAN_INTERCEPTOR(long_t, dispatch_semaphore_wait, dispatch_semaphore_t dsema,
+                 dispatch_time_t timeout) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_semaphore_wait, dsema, timeout);
+  long_t result = REAL(dispatch_semaphore_wait)(dsema, timeout);
+  if (result == 0) Acquire(thr, pc, (uptr)dsema);
+  return result;
+}
+
+TSAN_INTERCEPTOR(long_t, dispatch_group_wait, dispatch_group_t group,
+                 dispatch_time_t timeout) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_wait, group, timeout);
+  long_t result = REAL(dispatch_group_wait)(group, timeout);
+  if (result == 0) Acquire(thr, pc, (uptr)group);
+  return result;
+}
+
+TSAN_INTERCEPTOR(void, dispatch_group_leave, dispatch_group_t group) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_leave, group);
+  Release(thr, pc, (uptr)group);
+  REAL(dispatch_group_leave)(group);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_group_async, dispatch_group_t group,
+                 dispatch_queue_t queue, dispatch_block_t block) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_async, group, queue, block);
+  dispatch_retain(group);
+  dispatch_group_enter(group);
+  WRAP(dispatch_async)(queue, ^(void) {
+    block();
+    WRAP(dispatch_group_leave)(group);
+    dispatch_release(group);
+  });
+}
+
+TSAN_INTERCEPTOR(void, dispatch_group_async_f, dispatch_group_t group,
+                 dispatch_queue_t queue, void *context,
+                 dispatch_function_t work) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_async_f, group, queue, context, work);
+  dispatch_retain(group);
+  dispatch_group_enter(group);
+  WRAP(dispatch_async)(queue, ^(void) {
+    work(context);
+    WRAP(dispatch_group_leave)(group);
+    dispatch_release(group);
+  });
+}
+
+TSAN_INTERCEPTOR(void, dispatch_group_notify, dispatch_group_t group,
+                 dispatch_queue_t q, dispatch_block_t block) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_notify, group, q, block);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  dispatch_block_t heap_block = Block_copy(block);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+  tsan_block_context_t *new_context =
+      AllocContext(thr, pc, q, heap_block, &invoke_and_release_block);
+  new_context->object_to_acquire = (uptr)group;
+
+  // Will be released in dispatch_callback_wrap_acquire.
+  new_context->object_to_release = group;
+  dispatch_retain(group);
+
+  Release(thr, pc, (uptr)group);
+  REAL(dispatch_group_notify_f)(group, q, new_context,
+                                dispatch_callback_wrap_acquire);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_group_notify_f, dispatch_group_t group,
+                 dispatch_queue_t q, void *context, dispatch_function_t work) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_notify_f, group, q, context, work);
+  tsan_block_context_t *new_context = AllocContext(thr, pc, q, context, work);
+  new_context->object_to_acquire = (uptr)group;
+
+  // Will be released in dispatch_callback_wrap_acquire.
+  new_context->object_to_release = group;
+  dispatch_retain(group);
+
+  Release(thr, pc, (uptr)group);
+  REAL(dispatch_group_notify_f)(group, q, new_context,
+                                dispatch_callback_wrap_acquire);
+}
+
+}  // namespace __tsan
+
+#endif  // SANITIZER_MAC
diff --git a/lib/tsan/rtl/tsan_malloc_mac.cc b/lib/tsan/rtl/tsan_malloc_mac.cc
new file mode 100644
index 0000000..7fd9427
--- /dev/null
+++ b/lib/tsan/rtl/tsan_malloc_mac.cc
@@ -0,0 +1,65 @@
+//===-- tsan_malloc_mac.cc ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Mac-specific malloc interception.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_MAC
+
+#include "tsan_interceptors.h"
+#include "tsan_stack_trace.h"
+
+using namespace __tsan;
+#define COMMON_MALLOC_ZONE_NAME "tsan"
+#define COMMON_MALLOC_ENTER()
+#define COMMON_MALLOC_SANITIZER_INITIALIZED (cur_thread()->is_inited)
+#define COMMON_MALLOC_FORCE_LOCK()
+#define COMMON_MALLOC_FORCE_UNLOCK()
+#define COMMON_MALLOC_MEMALIGN(alignment, size) \
+  void *p =                                     \
+      user_alloc(cur_thread(), StackTrace::GetCurrentPc(), size, alignment)
+#define COMMON_MALLOC_MALLOC(size)      \
+  if (cur_thread()->in_symbolizer)      \
+    return REAL(malloc)(size);          \
+  SCOPED_INTERCEPTOR_RAW(malloc, size); \
+  void *p = user_alloc(thr, pc, size)
+#define COMMON_MALLOC_REALLOC(ptr, size)      \
+  if (cur_thread()->in_symbolizer)            \
+    return REAL(realloc)(ptr, size);          \
+  SCOPED_INTERCEPTOR_RAW(realloc, ptr, size); \
+  void *p = user_realloc(thr, pc, ptr, size)
+#define COMMON_MALLOC_CALLOC(count, size)      \
+  if (cur_thread()->in_symbolizer)             \
+    return REAL(calloc)(count, size);          \
+  SCOPED_INTERCEPTOR_RAW(calloc, size, count); \
+  void *p = user_calloc(thr, pc, size, count)
+#define COMMON_MALLOC_VALLOC(size)                          \
+  if (cur_thread()->in_symbolizer)                          \
+    return REAL(valloc)(size);                              \
+  SCOPED_INTERCEPTOR_RAW(valloc, size);                     \
+  void *p = user_alloc(thr, pc, size, GetPageSizeCached())
+#define COMMON_MALLOC_FREE(ptr)      \
+  if (cur_thread()->in_symbolizer)   \
+    return REAL(free)(ptr);          \
+  SCOPED_INTERCEPTOR_RAW(free, ptr); \
+  user_free(thr, pc, ptr)
+#define COMMON_MALLOC_SIZE(ptr) \
+  uptr size = user_alloc_usable_size(ptr);
+#define COMMON_MALLOC_FILL_STATS(zone, stats)
+#define COMMON_MALLOC_REPORT_UNKNOWN_REALLOC(ptr, zone_ptr, zone_name) \
+  (void)zone_name; \
+  Report("mz_realloc(%p) -- attempting to realloc unallocated memory.\n", ptr);
+#define COMMON_MALLOC_NAMESPACE __tsan
+
+#include "sanitizer_common/sanitizer_malloc_mac.inc"
+
+#endif
diff --git a/lib/tsan/rtl/tsan_mman.cc b/lib/tsan/rtl/tsan_mman.cc
index ebb3f77..7247c6e 100644
--- a/lib/tsan/rtl/tsan_mman.cc
+++ b/lib/tsan/rtl/tsan_mman.cc
@@ -19,12 +19,14 @@
 #include "tsan_flags.h"
 
 // May be overriden by front-end.
-extern "C" void WEAK __sanitizer_malloc_hook(void *ptr, uptr size) {
+SANITIZER_WEAK_DEFAULT_IMPL
+void __sanitizer_malloc_hook(void *ptr, uptr size) {
   (void)ptr;
   (void)size;
 }
 
-extern "C" void WEAK __sanitizer_free_hook(void *ptr) {
+SANITIZER_WEAK_DEFAULT_IMPL
+void __sanitizer_free_hook(void *ptr) {
   (void)ptr;
 }
 
@@ -36,6 +38,23 @@
     // We are about to unmap a chunk of user memory.
     // Mark the corresponding shadow memory as not needed.
     DontNeedShadowFor(p, size);
+    // Mark the corresponding meta shadow memory as not needed.
+    // Note the block does not contain any meta info at this point
+    // (this happens after free).
+    const uptr kMetaRatio = kMetaShadowCell / kMetaShadowSize;
+    const uptr kPageSize = GetPageSizeCached() * kMetaRatio;
+    // Block came from LargeMmapAllocator, so must be large.
+    // We rely on this in the calculations below.
+    CHECK_GE(size, 2 * kPageSize);
+    uptr diff = RoundUp(p, kPageSize) - p;
+    if (diff != 0) {
+      p += diff;
+      size -= diff;
+    }
+    diff = p + size - RoundDown(p + size, kPageSize);
+    if (diff != 0)
+      size -= diff;
+    FlushUnneededShadowMemory((uptr)MemToMeta(p), size / kMetaRatio);
   }
 };
 
@@ -63,17 +82,17 @@
 }
 
 static void SignalUnsafeCall(ThreadState *thr, uptr pc) {
-  if (atomic_load(&thr->in_signal_handler, memory_order_relaxed) == 0 ||
+  if (atomic_load_relaxed(&thr->in_signal_handler) == 0 ||
       !flags()->report_signal_unsafe)
     return;
   VarSizeStackTrace stack;
   ObtainCurrentStack(thr, pc, &stack);
+  if (IsFiredSuppression(ctx, ReportTypeSignalUnsafe, stack))
+    return;
   ThreadRegistryLock l(ctx->thread_registry);
   ScopedReport rep(ReportTypeSignalUnsafe);
-  if (!IsFiredSuppression(ctx, rep, stack)) {
-    rep.AddStack(stack, true);
-    OutputReport(thr, rep);
-  }
+  rep.AddStack(stack, true);
+  OutputReport(thr, rep);
 }
 
 void *user_alloc(ThreadState *thr, uptr pc, uptr sz, uptr align, bool signal) {
diff --git a/lib/tsan/rtl/tsan_mman.h b/lib/tsan/rtl/tsan_mman.h
index 5ff956d..b419b58 100644
--- a/lib/tsan/rtl/tsan_mman.h
+++ b/lib/tsan/rtl/tsan_mman.h
@@ -20,6 +20,7 @@
 const uptr kDefaultAlignment = 16;
 
 void InitializeAllocator();
+void ReplaceSystemMalloc();
 void AllocatorThreadStart(ThreadState *thr);
 void AllocatorThreadFinish(ThreadState *thr);
 void AllocatorPrintStats();
diff --git a/lib/tsan/rtl/tsan_mutex.cc b/lib/tsan/rtl/tsan_mutex.cc
index dc5a462..9dd2480 100644
--- a/lib/tsan/rtl/tsan_mutex.cc
+++ b/lib/tsan/rtl/tsan_mutex.cc
@@ -41,6 +41,8 @@
   /*9  MutexTypeMBlock*/      {MutexTypeSyncVar},
   /*10 MutexTypeJavaMBlock*/  {MutexTypeSyncVar},
   /*11 MutexTypeDDetector*/   {},
+  /*12 MutexTypeFired*/       {MutexTypeLeaf},
+  /*13 MutexTypeRacy*/        {MutexTypeLeaf},
 };
 
 static bool CanLockAdj[MutexTypeCount][MutexTypeCount];
diff --git a/lib/tsan/rtl/tsan_mutex.h b/lib/tsan/rtl/tsan_mutex.h
index 88fad57..27f5538 100644
--- a/lib/tsan/rtl/tsan_mutex.h
+++ b/lib/tsan/rtl/tsan_mutex.h
@@ -32,6 +32,8 @@
   MutexTypeMBlock,
   MutexTypeJavaMBlock,
   MutexTypeDDetector,
+  MutexTypeFired,
+  MutexTypeRacy,
 
   // This must be the last.
   MutexTypeCount
diff --git a/lib/tsan/rtl/tsan_new_delete.cc b/lib/tsan/rtl/tsan_new_delete.cc
index 2d9d044..ebb422c 100644
--- a/lib/tsan/rtl/tsan_new_delete.cc
+++ b/lib/tsan/rtl/tsan_new_delete.cc
@@ -11,6 +11,7 @@
 //
 // Interceptors for operators new and delete.
 //===----------------------------------------------------------------------===//
+#include "interception/interception.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "tsan_interceptors.h"
 
@@ -20,6 +21,13 @@
 struct nothrow_t {};
 }  // namespace std
 
+DECLARE_REAL(void *, malloc, uptr size)
+DECLARE_REAL(void, free, void *ptr)
+#if SANITIZER_MAC || SANITIZER_ANDROID
+#define __libc_malloc REAL(malloc)
+#define __libc_free REAL(free)
+#endif
+
 #define OPERATOR_NEW_BODY(mangled_name) \
   if (cur_thread()->in_symbolizer) \
     return __libc_malloc(size); \
@@ -64,14 +72,14 @@
   user_free(thr, pc, ptr);
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void operator delete(void *ptr) throw();
-void operator delete(void *ptr) throw() {
+void operator delete(void *ptr) NOEXCEPT;
+void operator delete(void *ptr) NOEXCEPT {
   OPERATOR_DELETE_BODY(_ZdlPv);
 }
 
 SANITIZER_INTERFACE_ATTRIBUTE
-void operator delete[](void *ptr) throw();
-void operator delete[](void *ptr) throw() {
+void operator delete[](void *ptr) NOEXCEPT;
+void operator delete[](void *ptr) NOEXCEPT {
   OPERATOR_DELETE_BODY(_ZdaPv);
 }
 
diff --git a/lib/tsan/rtl/tsan_platform.h b/lib/tsan/rtl/tsan_platform.h
index 135e160..c2b4871 100644
--- a/lib/tsan/rtl/tsan_platform.h
+++ b/lib/tsan/rtl/tsan_platform.h
@@ -41,21 +41,23 @@
 7e00 0000 0000 - 7e80 0000 0000: -
 7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
 */
-const uptr kMetaShadowBeg = 0x300000000000ull;
-const uptr kMetaShadowEnd = 0x400000000000ull;
-const uptr kTraceMemBeg   = 0x600000000000ull;
-const uptr kTraceMemEnd   = 0x620000000000ull;
-const uptr kShadowBeg     = 0x020000000000ull;
-const uptr kShadowEnd     = 0x100000000000ull;
-const uptr kHeapMemBeg    = 0x7d0000000000ull;
-const uptr kHeapMemEnd    = 0x7e0000000000ull;
-const uptr kLoAppMemBeg   = 0x000000001000ull;
-const uptr kLoAppMemEnd   = 0x010000000000ull;
-const uptr kHiAppMemBeg   = 0x7e8000000000ull;
-const uptr kHiAppMemEnd   = 0x800000000000ull;
-const uptr kAppMemMsk     = 0x7c0000000000ull;
-const uptr kAppMemXor     = 0x020000000000ull;
-const uptr kVdsoBeg       = 0xf000000000000000ull;
+struct Mapping {
+  static const uptr kMetaShadowBeg = 0x300000000000ull;
+  static const uptr kMetaShadowEnd = 0x400000000000ull;
+  static const uptr kTraceMemBeg   = 0x600000000000ull;
+  static const uptr kTraceMemEnd   = 0x620000000000ull;
+  static const uptr kShadowBeg     = 0x020000000000ull;
+  static const uptr kShadowEnd     = 0x100000000000ull;
+  static const uptr kHeapMemBeg    = 0x7d0000000000ull;
+  static const uptr kHeapMemEnd    = 0x7e0000000000ull;
+  static const uptr kLoAppMemBeg   = 0x000000001000ull;
+  static const uptr kLoAppMemEnd   = 0x010000000000ull;
+  static const uptr kHiAppMemBeg   = 0x7e8000000000ull;
+  static const uptr kHiAppMemEnd   = 0x800000000000ull;
+  static const uptr kAppMemMsk     = 0x7c0000000000ull;
+  static const uptr kAppMemXor     = 0x020000000000ull;
+  static const uptr kVdsoBeg       = 0xf000000000000000ull;
+};
 #elif defined(__mips64)
 /*
 C/C++ on linux/mips64
@@ -71,68 +73,180 @@
 ff00 0000 00 - ff80 0000 00: -
 ff80 0000 00 - ffff ffff ff: modules and main thread stack
 */
-const uptr kMetaShadowBeg = 0x3000000000ull;
-const uptr kMetaShadowEnd = 0x4000000000ull;
-const uptr kTraceMemBeg   = 0x6000000000ull;
-const uptr kTraceMemEnd   = 0x6200000000ull;
-const uptr kShadowBeg     = 0x1400000000ull;
-const uptr kShadowEnd     = 0x2400000000ull;
-const uptr kHeapMemBeg    = 0xfe00000000ull;
-const uptr kHeapMemEnd    = 0xff00000000ull;
-const uptr kLoAppMemBeg   = 0x0100000000ull;
-const uptr kLoAppMemEnd   = 0x0200000000ull;
-const uptr kHiAppMemBeg   = 0xff80000000ull;
-const uptr kHiAppMemEnd   = 0xffffffffffull;
-const uptr kAppMemMsk     = 0xfc00000000ull;
-const uptr kAppMemXor     = 0x0400000000ull;
-const uptr kVdsoBeg       = 0xfffff00000ull;
-#endif
-
-ALWAYS_INLINE
-bool IsAppMem(uptr mem) {
-  return (mem >= kHeapMemBeg && mem < kHeapMemEnd) ||
-         (mem >= kLoAppMemBeg && mem < kLoAppMemEnd) ||
-         (mem >= kHiAppMemBeg && mem < kHiAppMemEnd);
-}
-
-ALWAYS_INLINE
-bool IsShadowMem(uptr mem) {
-  return mem >= kShadowBeg && mem <= kShadowEnd;
-}
-
-ALWAYS_INLINE
-bool IsMetaMem(uptr mem) {
-  return mem >= kMetaShadowBeg && mem <= kMetaShadowEnd;
-}
-
-ALWAYS_INLINE
-uptr MemToShadow(uptr x) {
-  DCHECK(IsAppMem(x));
-  return (((x) & ~(kAppMemMsk | (kShadowCell - 1)))
-      ^ kAppMemXor) * kShadowCnt;
-}
-
-ALWAYS_INLINE
-u32 *MemToMeta(uptr x) {
-  DCHECK(IsAppMem(x));
-  return (u32*)(((((x) & ~(kAppMemMsk | (kMetaShadowCell - 1)))
-      ^ kAppMemXor) / kMetaShadowCell * kMetaShadowSize) | kMetaShadowBeg);
-}
-
-ALWAYS_INLINE
-uptr ShadowToMem(uptr s) {
-  CHECK(IsShadowMem(s));
-  if (s >= MemToShadow(kLoAppMemBeg) && s <= MemToShadow(kLoAppMemEnd - 1))
-    return (s / kShadowCnt) ^ kAppMemXor;
-  else
-    return ((s / kShadowCnt) ^ kAppMemXor) | kAppMemMsk;
-}
-
-static USED uptr UserRegions[] = {
-  kLoAppMemBeg, kLoAppMemEnd,
-  kHiAppMemBeg, kHiAppMemEnd,
-  kHeapMemBeg,  kHeapMemEnd,
+struct Mapping {
+  static const uptr kMetaShadowBeg = 0x3000000000ull;
+  static const uptr kMetaShadowEnd = 0x4000000000ull;
+  static const uptr kTraceMemBeg   = 0x6000000000ull;
+  static const uptr kTraceMemEnd   = 0x6200000000ull;
+  static const uptr kShadowBeg     = 0x1400000000ull;
+  static const uptr kShadowEnd     = 0x2400000000ull;
+  static const uptr kHeapMemBeg    = 0xfe00000000ull;
+  static const uptr kHeapMemEnd    = 0xff00000000ull;
+  static const uptr kLoAppMemBeg   = 0x0100000000ull;
+  static const uptr kLoAppMemEnd   = 0x0200000000ull;
+  static const uptr kHiAppMemBeg   = 0xff80000000ull;
+  static const uptr kHiAppMemEnd   = 0xffffffffffull;
+  static const uptr kAppMemMsk     = 0xfc00000000ull;
+  static const uptr kAppMemXor     = 0x0400000000ull;
+  static const uptr kVdsoBeg       = 0xfffff00000ull;
 };
+#elif defined(__aarch64__)
+// AArch64 supports multiple VMA which leads to multiple address transformation
+// functions.  To support these multiple VMAS transformations and mappings TSAN
+// runtime for AArch64 uses an external memory read (vmaSize) to select which
+// mapping to use.  Although slower, it make a same instrumented binary run on
+// multiple kernels.
+
+/*
+C/C++ on linux/aarch64 (39-bit VMA)
+0000 0010 00 - 0100 0000 00: main binary
+0100 0000 00 - 0800 0000 00: -
+0800 0000 00 - 2000 0000 00: shadow memory
+2000 0000 00 - 3100 0000 00: -
+3100 0000 00 - 3400 0000 00: metainfo
+3400 0000 00 - 5500 0000 00: -
+5500 0000 00 - 5600 0000 00: main binary (PIE)
+5600 0000 00 - 6000 0000 00: -
+6000 0000 00 - 6200 0000 00: traces
+6200 0000 00 - 7d00 0000 00: -
+7c00 0000 00 - 7d00 0000 00: heap
+7d00 0000 00 - 7fff ffff ff: modules and main thread stack
+*/
+struct Mapping39 {
+  static const uptr kLoAppMemBeg   = 0x0000001000ull;
+  static const uptr kLoAppMemEnd   = 0x0100000000ull;
+  static const uptr kShadowBeg     = 0x0800000000ull;
+  static const uptr kShadowEnd     = 0x2000000000ull;
+  static const uptr kMetaShadowBeg = 0x3100000000ull;
+  static const uptr kMetaShadowEnd = 0x3400000000ull;
+  static const uptr kMidAppMemBeg  = 0x5500000000ull;
+  static const uptr kMidAppMemEnd  = 0x5600000000ull;
+  static const uptr kMidShadowOff  = 0x5000000000ull;
+  static const uptr kTraceMemBeg   = 0x6000000000ull;
+  static const uptr kTraceMemEnd   = 0x6200000000ull;
+  static const uptr kHeapMemBeg    = 0x7c00000000ull;
+  static const uptr kHeapMemEnd    = 0x7d00000000ull;
+  static const uptr kHiAppMemBeg   = 0x7e00000000ull;
+  static const uptr kHiAppMemEnd   = 0x7fffffffffull;
+  static const uptr kAppMemMsk     = 0x7800000000ull;
+  static const uptr kAppMemXor     = 0x0200000000ull;
+  static const uptr kVdsoBeg       = 0x7f00000000ull;
+};
+
+/*
+C/C++ on linux/aarch64 (42-bit VMA)
+00000 0010 00 - 01000 0000 00: main binary
+01000 0000 00 - 10000 0000 00: -
+10000 0000 00 - 20000 0000 00: shadow memory
+20000 0000 00 - 26000 0000 00: -
+26000 0000 00 - 28000 0000 00: metainfo
+28000 0000 00 - 2aa00 0000 00: -
+2aa00 0000 00 - 2ab00 0000 00: main binary (PIE)
+2ab00 0000 00 - 36200 0000 00: -
+36200 0000 00 - 36240 0000 00: traces
+36240 0000 00 - 3e000 0000 00: -
+3e000 0000 00 - 3f000 0000 00: heap
+3f000 0000 00 - 3ffff ffff ff: modules and main thread stack
+*/
+struct Mapping42 {
+  static const uptr kLoAppMemBeg   = 0x00000001000ull;
+  static const uptr kLoAppMemEnd   = 0x01000000000ull;
+  static const uptr kShadowBeg     = 0x10000000000ull;
+  static const uptr kShadowEnd     = 0x20000000000ull;
+  static const uptr kMetaShadowBeg = 0x26000000000ull;
+  static const uptr kMetaShadowEnd = 0x28000000000ull;
+  static const uptr kMidAppMemBeg  = 0x2aa00000000ull;
+  static const uptr kMidAppMemEnd  = 0x2ab00000000ull;
+  static const uptr kMidShadowOff  = 0x28000000000ull;
+  static const uptr kTraceMemBeg   = 0x36200000000ull;
+  static const uptr kTraceMemEnd   = 0x36400000000ull;
+  static const uptr kHeapMemBeg    = 0x3e000000000ull;
+  static const uptr kHeapMemEnd    = 0x3f000000000ull;
+  static const uptr kHiAppMemBeg   = 0x3f000000000ull;
+  static const uptr kHiAppMemEnd   = 0x3ffffffffffull;
+  static const uptr kAppMemMsk     = 0x3c000000000ull;
+  static const uptr kAppMemXor     = 0x04000000000ull;
+  static const uptr kVdsoBeg       = 0x37f00000000ull;
+};
+
+// Indicates the runtime will define the memory regions at runtime.
+#define TSAN_RUNTIME_VMA 1
+// Indicates that mapping defines a mid range memory segment.
+#define TSAN_MID_APP_RANGE 1
+#elif defined(__powerpc64__)
+// PPC64 supports multiple VMA which leads to multiple address transformation
+// functions.  To support these multiple VMAS transformations and mappings TSAN
+// runtime for PPC64 uses an external memory read (vmaSize) to select which
+// mapping to use.  Although slower, it make a same instrumented binary run on
+// multiple kernels.
+
+/*
+C/C++ on linux/powerpc64 (44-bit VMA)
+0000 0000 0100 - 0001 0000 0000: main binary
+0001 0000 0000 - 0001 0000 0000: -
+0001 0000 0000 - 0b00 0000 0000: shadow
+0b00 0000 0000 - 0b00 0000 0000: -
+0b00 0000 0000 - 0d00 0000 0000: metainfo (memory blocks and sync objects)
+0d00 0000 0000 - 0d00 0000 0000: -
+0d00 0000 0000 - 0f00 0000 0000: traces
+0f00 0000 0000 - 0f00 0000 0000: -
+0f00 0000 0000 - 0f50 0000 0000: heap
+0f50 0000 0000 - 0f60 0000 0000: -
+0f60 0000 0000 - 1000 0000 0000: modules and main thread stack
+*/
+struct Mapping44 {
+  static const uptr kMetaShadowBeg = 0x0b0000000000ull;
+  static const uptr kMetaShadowEnd = 0x0d0000000000ull;
+  static const uptr kTraceMemBeg   = 0x0d0000000000ull;
+  static const uptr kTraceMemEnd   = 0x0f0000000000ull;
+  static const uptr kShadowBeg     = 0x000100000000ull;
+  static const uptr kShadowEnd     = 0x0b0000000000ull;
+  static const uptr kLoAppMemBeg   = 0x000000000100ull;
+  static const uptr kLoAppMemEnd   = 0x000100000000ull;
+  static const uptr kHeapMemBeg    = 0x0f0000000000ull;
+  static const uptr kHeapMemEnd    = 0x0f5000000000ull;
+  static const uptr kHiAppMemBeg   = 0x0f6000000000ull;
+  static const uptr kHiAppMemEnd   = 0x100000000000ull; // 44 bits
+  static const uptr kAppMemMsk     = 0x0f0000000000ull;
+  static const uptr kAppMemXor     = 0x002100000000ull;
+  static const uptr kVdsoBeg       = 0x3c0000000000000ull;
+};
+
+/*
+C/C++ on linux/powerpc64 (46-bit VMA)
+0000 0000 1000 - 0100 0000 0000: main binary
+0100 0000 0000 - 0200 0000 0000: -
+0100 0000 0000 - 1000 0000 0000: shadow
+1000 0000 0000 - 1000 0000 0000: -
+1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects)
+2000 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 2200 0000 0000: traces
+2200 0000 0000 - 3d00 0000 0000: -
+3d00 0000 0000 - 3e00 0000 0000: heap
+3e00 0000 0000 - 3e80 0000 0000: -
+3e80 0000 0000 - 4000 0000 0000: modules and main thread stack
+*/
+struct Mapping46 {
+  static const uptr kMetaShadowBeg = 0x100000000000ull;
+  static const uptr kMetaShadowEnd = 0x200000000000ull;
+  static const uptr kTraceMemBeg   = 0x200000000000ull;
+  static const uptr kTraceMemEnd   = 0x220000000000ull;
+  static const uptr kShadowBeg     = 0x010000000000ull;
+  static const uptr kShadowEnd     = 0x100000000000ull;
+  static const uptr kHeapMemBeg    = 0x3d0000000000ull;
+  static const uptr kHeapMemEnd    = 0x3e0000000000ull;
+  static const uptr kLoAppMemBeg   = 0x000000001000ull;
+  static const uptr kLoAppMemEnd   = 0x010000000000ull;
+  static const uptr kHiAppMemBeg   = 0x3e8000000000ull;
+  static const uptr kHiAppMemEnd   = 0x400000000000ull; // 46 bits
+  static const uptr kAppMemMsk     = 0x3c0000000000ull;
+  static const uptr kAppMemXor     = 0x020000000000ull;
+  static const uptr kVdsoBeg       = 0x7800000000000000ull;
+};
+
+// Indicates the runtime will define the memory regions at runtime.
+#define TSAN_RUNTIME_VMA 1
+#endif
 
 #elif defined(SANITIZER_GO) && !SANITIZER_WINDOWS
 
@@ -149,51 +263,15 @@
 6200 0000 0000 - 8000 0000 0000: -
 */
 
-const uptr kMetaShadowBeg = 0x300000000000ull;
-const uptr kMetaShadowEnd = 0x400000000000ull;
-const uptr kTraceMemBeg   = 0x600000000000ull;
-const uptr kTraceMemEnd   = 0x620000000000ull;
-const uptr kShadowBeg     = 0x200000000000ull;
-const uptr kShadowEnd     = 0x238000000000ull;
-const uptr kAppMemBeg     = 0x000000001000ull;
-const uptr kAppMemEnd     = 0x00e000000000ull;
-
-ALWAYS_INLINE
-bool IsAppMem(uptr mem) {
-  return mem >= kAppMemBeg && mem < kAppMemEnd;
-}
-
-ALWAYS_INLINE
-bool IsShadowMem(uptr mem) {
-  return mem >= kShadowBeg && mem <= kShadowEnd;
-}
-
-ALWAYS_INLINE
-bool IsMetaMem(uptr mem) {
-  return mem >= kMetaShadowBeg && mem <= kMetaShadowEnd;
-}
-
-ALWAYS_INLINE
-uptr MemToShadow(uptr x) {
-  DCHECK(IsAppMem(x));
-  return ((x & ~(kShadowCell - 1)) * kShadowCnt) | kShadowBeg;
-}
-
-ALWAYS_INLINE
-u32 *MemToMeta(uptr x) {
-  DCHECK(IsAppMem(x));
-  return (u32*)(((x & ~(kMetaShadowCell - 1)) / \
-      kMetaShadowCell * kMetaShadowSize) | kMetaShadowBeg);
-}
-
-ALWAYS_INLINE
-uptr ShadowToMem(uptr s) {
-  CHECK(IsShadowMem(s));
-  return (s & ~kShadowBeg) / kShadowCnt;
-}
-
-static USED uptr UserRegions[] = {
-  kAppMemBeg, kAppMemEnd,
+struct Mapping {
+  static const uptr kMetaShadowBeg = 0x300000000000ull;
+  static const uptr kMetaShadowEnd = 0x400000000000ull;
+  static const uptr kTraceMemBeg   = 0x600000000000ull;
+  static const uptr kTraceMemEnd   = 0x620000000000ull;
+  static const uptr kShadowBeg     = 0x200000000000ull;
+  static const uptr kShadowEnd     = 0x238000000000ull;
+  static const uptr kAppMemBeg     = 0x000000001000ull;
+  static const uptr kAppMemEnd     = 0x00e000000000ull;
 };
 
 #elif defined(SANITIZER_GO) && SANITIZER_WINDOWS
@@ -210,77 +288,470 @@
 07d0 0000 0000 - 8000 0000 0000: -
 */
 
-const uptr kMetaShadowBeg = 0x076000000000ull;
-const uptr kMetaShadowEnd = 0x07d000000000ull;
-const uptr kTraceMemBeg   = 0x056000000000ull;
-const uptr kTraceMemEnd   = 0x076000000000ull;
-const uptr kShadowBeg     = 0x010000000000ull;
-const uptr kShadowEnd     = 0x050000000000ull;
-const uptr kAppMemBeg     = 0x000000001000ull;
-const uptr kAppMemEnd     = 0x00e000000000ull;
-
-ALWAYS_INLINE
-bool IsAppMem(uptr mem) {
-  return mem >= kAppMemBeg && mem < kAppMemEnd;
+struct Mapping {
+  static const uptr kMetaShadowBeg = 0x076000000000ull;
+  static const uptr kMetaShadowEnd = 0x07d000000000ull;
+  static const uptr kTraceMemBeg   = 0x056000000000ull;
+  static const uptr kTraceMemEnd   = 0x076000000000ull;
+  static const uptr kShadowBeg     = 0x010000000000ull;
+  static const uptr kShadowEnd     = 0x050000000000ull;
+  static const uptr kAppMemBeg     = 0x000000001000ull;
+  static const uptr kAppMemEnd     = 0x00e000000000ull;
 }
 
-ALWAYS_INLINE
-bool IsShadowMem(uptr mem) {
-  return mem >= kShadowBeg && mem <= kShadowEnd;
-}
-
-ALWAYS_INLINE
-bool IsMetaMem(uptr mem) {
-  return mem >= kMetaShadowBeg && mem <= kMetaShadowEnd;
-}
-
-ALWAYS_INLINE
-uptr MemToShadow(uptr x) {
-  DCHECK(IsAppMem(x));
-  return ((x & ~(kShadowCell - 1)) * kShadowCnt) + kShadowBeg;
-}
-
-ALWAYS_INLINE
-u32 *MemToMeta(uptr x) {
-  DCHECK(IsAppMem(x));
-  return (u32*)(((x & ~(kMetaShadowCell - 1)) / \
-      kMetaShadowCell * kMetaShadowSize) | kMetaShadowBeg);
-}
-
-ALWAYS_INLINE
-uptr ShadowToMem(uptr s) {
-  CHECK(IsShadowMem(s));
-  // FIXME(dvyukov): this is most likely wrong as the mapping is not bijection.
-  return (s - kShadowBeg) / kShadowCnt;
-}
-
-static USED uptr UserRegions[] = {
-  kAppMemBeg, kAppMemEnd,
-};
-
 #else
 # error "Unknown platform"
 #endif
 
+
+#ifdef TSAN_RUNTIME_VMA
+extern uptr vmaSize;
+#endif
+
+
+enum MappingType {
+  MAPPING_LO_APP_BEG,
+  MAPPING_LO_APP_END,
+  MAPPING_HI_APP_BEG,
+  MAPPING_HI_APP_END,
+#ifdef TSAN_MID_APP_RANGE
+  MAPPING_MID_APP_BEG,
+  MAPPING_MID_APP_END,
+#endif
+  MAPPING_HEAP_BEG,
+  MAPPING_HEAP_END,
+  MAPPING_APP_BEG,
+  MAPPING_APP_END,
+  MAPPING_SHADOW_BEG,
+  MAPPING_SHADOW_END,
+  MAPPING_META_SHADOW_BEG,
+  MAPPING_META_SHADOW_END,
+  MAPPING_TRACE_BEG,
+  MAPPING_TRACE_END,
+  MAPPING_VDSO_BEG,
+};
+
+template<typename Mapping, int Type>
+uptr MappingImpl(void) {
+  switch (Type) {
+#ifndef SANITIZER_GO
+    case MAPPING_LO_APP_BEG: return Mapping::kLoAppMemBeg;
+    case MAPPING_LO_APP_END: return Mapping::kLoAppMemEnd;
+# ifdef TSAN_MID_APP_RANGE
+    case MAPPING_MID_APP_BEG: return Mapping::kMidAppMemBeg;
+    case MAPPING_MID_APP_END: return Mapping::kMidAppMemEnd;
+# endif
+    case MAPPING_HI_APP_BEG: return Mapping::kHiAppMemBeg;
+    case MAPPING_HI_APP_END: return Mapping::kHiAppMemEnd;
+    case MAPPING_HEAP_BEG: return Mapping::kHeapMemBeg;
+    case MAPPING_HEAP_END: return Mapping::kHeapMemEnd;
+    case MAPPING_VDSO_BEG: return Mapping::kVdsoBeg;
+#else
+    case MAPPING_APP_BEG: return Mapping::kAppMemBeg;
+    case MAPPING_APP_END: return Mapping::kAppMemEnd;
+#endif
+    case MAPPING_SHADOW_BEG: return Mapping::kShadowBeg;
+    case MAPPING_SHADOW_END: return Mapping::kShadowEnd;
+    case MAPPING_META_SHADOW_BEG: return Mapping::kMetaShadowBeg;
+    case MAPPING_META_SHADOW_END: return Mapping::kMetaShadowEnd;
+    case MAPPING_TRACE_BEG: return Mapping::kTraceMemBeg;
+    case MAPPING_TRACE_END: return Mapping::kTraceMemEnd;
+  }
+}
+
+template<int Type>
+uptr MappingArchImpl(void) {
+#ifdef __aarch64__
+  if (vmaSize == 39)
+    return MappingImpl<Mapping39, Type>();
+  else
+    return MappingImpl<Mapping42, Type>();
+  DCHECK(0);
+#elif defined(__powerpc64__)
+  if (vmaSize == 44)
+    return MappingImpl<Mapping44, Type>();
+  else
+    return MappingImpl<Mapping46, Type>();
+  DCHECK(0);
+#else
+  return MappingImpl<Mapping, Type>();
+#endif
+}
+
+#ifndef SANITIZER_GO
+ALWAYS_INLINE
+uptr LoAppMemBeg(void) {
+  return MappingArchImpl<MAPPING_LO_APP_BEG>();
+}
+ALWAYS_INLINE
+uptr LoAppMemEnd(void) {
+  return MappingArchImpl<MAPPING_LO_APP_END>();
+}
+
+#ifdef TSAN_MID_APP_RANGE
+ALWAYS_INLINE
+uptr MidAppMemBeg(void) {
+  return MappingArchImpl<MAPPING_MID_APP_BEG>();
+}
+ALWAYS_INLINE
+uptr MidAppMemEnd(void) {
+  return MappingArchImpl<MAPPING_MID_APP_END>();
+}
+#endif
+
+ALWAYS_INLINE
+uptr HeapMemBeg(void) {
+  return MappingArchImpl<MAPPING_HEAP_BEG>();
+}
+ALWAYS_INLINE
+uptr HeapMemEnd(void) {
+  return MappingArchImpl<MAPPING_HEAP_END>();
+}
+
+ALWAYS_INLINE
+uptr HiAppMemBeg(void) {
+  return MappingArchImpl<MAPPING_HI_APP_BEG>();
+}
+ALWAYS_INLINE
+uptr HiAppMemEnd(void) {
+  return MappingArchImpl<MAPPING_HI_APP_END>();
+}
+
+ALWAYS_INLINE
+uptr VdsoBeg(void) {
+  return MappingArchImpl<MAPPING_VDSO_BEG>();
+}
+
+#else
+
+ALWAYS_INLINE
+uptr AppMemBeg(void) {
+  return MappingArchImpl<MAPPING_APP_BEG>();
+}
+ALWAYS_INLINE
+uptr AppMemEnd(void) {
+  return MappingArchImpl<MAPPING_APP_END>();
+}
+
+#endif
+
+static inline
+bool GetUserRegion(int i, uptr *start, uptr *end) {
+  switch (i) {
+  default:
+    return false;
+#ifndef SANITIZER_GO
+  case 0:
+    *start = LoAppMemBeg();
+    *end = LoAppMemEnd();
+    return true;
+  case 1:
+    *start = HiAppMemBeg();
+    *end = HiAppMemEnd();
+    return true;
+  case 2:
+    *start = HeapMemBeg();
+    *end = HeapMemEnd();
+    return true;
+# ifdef TSAN_MID_APP_RANGE
+  case 3:
+    *start = MidAppMemBeg();
+    *end = MidAppMemEnd();
+    return true;
+# endif
+#else
+  case 0:
+    *start = AppMemBeg();
+    *end = AppMemEnd();
+    return true;
+#endif
+  }
+}
+
+ALWAYS_INLINE
+uptr ShadowBeg(void) {
+  return MappingArchImpl<MAPPING_SHADOW_BEG>();
+}
+ALWAYS_INLINE
+uptr ShadowEnd(void) {
+  return MappingArchImpl<MAPPING_SHADOW_END>();
+}
+
+ALWAYS_INLINE
+uptr MetaShadowBeg(void) {
+  return MappingArchImpl<MAPPING_META_SHADOW_BEG>();
+}
+ALWAYS_INLINE
+uptr MetaShadowEnd(void) {
+  return MappingArchImpl<MAPPING_META_SHADOW_END>();
+}
+
+ALWAYS_INLINE
+uptr TraceMemBeg(void) {
+  return MappingArchImpl<MAPPING_TRACE_BEG>();
+}
+ALWAYS_INLINE
+uptr TraceMemEnd(void) {
+  return MappingArchImpl<MAPPING_TRACE_END>();
+}
+
+
+template<typename Mapping>
+bool IsAppMemImpl(uptr mem) {
+#ifndef SANITIZER_GO
+  return (mem >= Mapping::kHeapMemBeg && mem < Mapping::kHeapMemEnd) ||
+# ifdef TSAN_MID_APP_RANGE
+         (mem >= Mapping::kMidAppMemBeg && mem < Mapping::kMidAppMemEnd) ||
+# endif
+         (mem >= Mapping::kLoAppMemBeg && mem < Mapping::kLoAppMemEnd) ||
+         (mem >= Mapping::kHiAppMemBeg && mem < Mapping::kHiAppMemEnd);
+#else
+  return mem >= Mapping::kAppMemBeg && mem < Mapping::kAppMemEnd;
+#endif
+}
+
+ALWAYS_INLINE
+bool IsAppMem(uptr mem) {
+#ifdef __aarch64__
+  if (vmaSize == 39)
+    return IsAppMemImpl<Mapping39>(mem);
+  else
+    return IsAppMemImpl<Mapping42>(mem);
+  DCHECK(0);
+#elif defined(__powerpc64__)
+  if (vmaSize == 44)
+    return IsAppMemImpl<Mapping44>(mem);
+  else
+    return IsAppMemImpl<Mapping46>(mem);
+  DCHECK(0);
+#else
+  return IsAppMemImpl<Mapping>(mem);
+#endif
+}
+
+
+template<typename Mapping>
+bool IsShadowMemImpl(uptr mem) {
+  return mem >= Mapping::kShadowBeg && mem <= Mapping::kShadowEnd;
+}
+
+ALWAYS_INLINE
+bool IsShadowMem(uptr mem) {
+#ifdef __aarch64__
+  if (vmaSize == 39)
+    return IsShadowMemImpl<Mapping39>(mem);
+  else
+    return IsShadowMemImpl<Mapping42>(mem);
+  DCHECK(0);
+#elif defined(__powerpc64__)
+  if (vmaSize == 44)
+    return IsShadowMemImpl<Mapping44>(mem);
+  else
+    return IsShadowMemImpl<Mapping46>(mem);
+  DCHECK(0);
+#else
+  return IsShadowMemImpl<Mapping>(mem);
+#endif
+}
+
+
+template<typename Mapping>
+bool IsMetaMemImpl(uptr mem) {
+  return mem >= Mapping::kMetaShadowBeg && mem <= Mapping::kMetaShadowEnd;
+}
+
+ALWAYS_INLINE
+bool IsMetaMem(uptr mem) {
+#ifdef __aarch64__
+  if (vmaSize == 39)
+    return IsMetaMemImpl<Mapping39>(mem);
+  else
+    return IsMetaMemImpl<Mapping42>(mem);
+  DCHECK(0);
+#elif defined(__powerpc64__)
+  if (vmaSize == 44)
+    return IsMetaMemImpl<Mapping44>(mem);
+  else
+    return IsMetaMemImpl<Mapping46>(mem);
+  DCHECK(0);
+#else
+  return IsMetaMemImpl<Mapping>(mem);
+#endif
+}
+
+
+template<typename Mapping>
+uptr MemToShadowImpl(uptr x) {
+  DCHECK(IsAppMem(x));
+#ifndef SANITIZER_GO
+  return (((x) & ~(Mapping::kAppMemMsk | (kShadowCell - 1)))
+      ^ Mapping::kAppMemXor) * kShadowCnt;
+#else
+  return ((x & ~(kShadowCell - 1)) * kShadowCnt) | Mapping::kShadowBeg;
+#endif
+}
+
+ALWAYS_INLINE
+uptr MemToShadow(uptr x) {
+#ifdef __aarch64__
+  if (vmaSize == 39)
+    return MemToShadowImpl<Mapping39>(x);
+  else
+    return MemToShadowImpl<Mapping42>(x);
+  DCHECK(0);
+#elif defined(__powerpc64__)
+  if (vmaSize == 44)
+    return MemToShadowImpl<Mapping44>(x);
+  else
+    return MemToShadowImpl<Mapping46>(x);
+  DCHECK(0);
+#else
+  return MemToShadowImpl<Mapping>(x);
+#endif
+}
+
+
+template<typename Mapping>
+u32 *MemToMetaImpl(uptr x) {
+  DCHECK(IsAppMem(x));
+#ifndef SANITIZER_GO
+  return (u32*)(((((x) & ~(Mapping::kAppMemMsk | (kMetaShadowCell - 1)))
+        ^ Mapping::kAppMemXor) / kMetaShadowCell * kMetaShadowSize)
+          | Mapping::kMetaShadowBeg);
+#else
+  return (u32*)(((x & ~(kMetaShadowCell - 1)) / \
+      kMetaShadowCell * kMetaShadowSize) | Mapping::kMetaShadowBeg);
+#endif
+}
+
+ALWAYS_INLINE
+u32 *MemToMeta(uptr x) {
+#ifdef __aarch64__
+  if (vmaSize == 39)
+    return MemToMetaImpl<Mapping39>(x);
+  else
+    return MemToMetaImpl<Mapping42>(x);
+  DCHECK(0);
+#elif defined(__powerpc64__)
+  if (vmaSize == 44)
+    return MemToMetaImpl<Mapping44>(x);
+  else
+    return MemToMetaImpl<Mapping46>(x);
+  DCHECK(0);
+#else
+  return MemToMetaImpl<Mapping>(x);
+#endif
+}
+
+
+template<typename Mapping>
+uptr ShadowToMemImpl(uptr s) {
+  DCHECK(IsShadowMem(s));
+#ifndef SANITIZER_GO
+  if (s >= MemToShadow(Mapping::kLoAppMemBeg)
+      && s <= MemToShadow(Mapping::kLoAppMemEnd - 1))
+    return (s / kShadowCnt) ^ Mapping::kAppMemXor;
+# ifdef TSAN_MID_APP_RANGE
+  if (s >= MemToShadow(Mapping::kMidAppMemBeg)
+      && s <= MemToShadow(Mapping::kMidAppMemEnd - 1))
+    return ((s / kShadowCnt) ^ Mapping::kAppMemXor) + Mapping::kMidShadowOff;
+# endif
+  else
+    return ((s / kShadowCnt) ^ Mapping::kAppMemXor) | Mapping::kAppMemMsk;
+#else
+# ifndef SANITIZER_WINDOWS
+  return (s & ~Mapping::kShadowBeg) / kShadowCnt;
+# else
+  // FIXME(dvyukov): this is most likely wrong as the mapping is not bijection.
+  return (s - Mapping::kShadowBeg) / kShadowCnt;
+# endif // SANITIZER_WINDOWS
+#endif
+}
+
+ALWAYS_INLINE
+uptr ShadowToMem(uptr s) {
+#ifdef __aarch64__
+  if (vmaSize == 39)
+    return ShadowToMemImpl<Mapping39>(s);
+  else
+    return ShadowToMemImpl<Mapping42>(s);
+  DCHECK(0);
+#elif defined(__powerpc64__)
+  if (vmaSize == 44)
+    return ShadowToMemImpl<Mapping44>(s);
+  else
+    return ShadowToMemImpl<Mapping46>(s);
+  DCHECK(0);
+#else
+  return ShadowToMemImpl<Mapping>(s);
+#endif
+}
+
+
+
 // The additional page is to catch shadow stack overflow as paging fault.
 // Windows wants 64K alignment for mmaps.
 const uptr kTotalTraceSize = (kTraceSize * sizeof(Event) + sizeof(Trace)
     + (64 << 10) + (64 << 10) - 1) & ~((64 << 10) - 1);
 
-uptr ALWAYS_INLINE GetThreadTrace(int tid) {
-  uptr p = kTraceMemBeg + (uptr)tid * kTotalTraceSize;
-  DCHECK_LT(p, kTraceMemEnd);
+template<typename Mapping>
+uptr GetThreadTraceImpl(int tid) {
+  uptr p = Mapping::kTraceMemBeg + (uptr)tid * kTotalTraceSize;
+  DCHECK_LT(p, Mapping::kTraceMemEnd);
   return p;
 }
 
-uptr ALWAYS_INLINE GetThreadTraceHeader(int tid) {
-  uptr p = kTraceMemBeg + (uptr)tid * kTotalTraceSize
+ALWAYS_INLINE
+uptr GetThreadTrace(int tid) {
+#ifdef __aarch64__
+  if (vmaSize == 39)
+    return GetThreadTraceImpl<Mapping39>(tid);
+  else
+    return GetThreadTraceImpl<Mapping42>(tid);
+  DCHECK(0);
+#elif defined(__powerpc64__)
+  if (vmaSize == 44)
+    return GetThreadTraceImpl<Mapping44>(tid);
+  else
+    return GetThreadTraceImpl<Mapping46>(tid);
+  DCHECK(0);
+#else
+  return GetThreadTraceImpl<Mapping>(tid);
+#endif
+}
+
+
+template<typename Mapping>
+uptr GetThreadTraceHeaderImpl(int tid) {
+  uptr p = Mapping::kTraceMemBeg + (uptr)tid * kTotalTraceSize
       + kTraceSize * sizeof(Event);
-  DCHECK_LT(p, kTraceMemEnd);
+  DCHECK_LT(p, Mapping::kTraceMemEnd);
   return p;
 }
 
+ALWAYS_INLINE
+uptr GetThreadTraceHeader(int tid) {
+#ifdef __aarch64__
+  if (vmaSize == 39)
+    return GetThreadTraceHeaderImpl<Mapping39>(tid);
+  else
+    return GetThreadTraceHeaderImpl<Mapping42>(tid);
+  DCHECK(0);
+#elif defined(__powerpc64__)
+  if (vmaSize == 44)
+    return GetThreadTraceHeaderImpl<Mapping44>(tid);
+  else
+    return GetThreadTraceHeaderImpl<Mapping46>(tid);
+  DCHECK(0);
+#else
+  return GetThreadTraceHeaderImpl<Mapping>(tid);
+#endif
+}
+
 void InitializePlatform();
+void InitializePlatformEarly();
+void CheckAndProtect();
+void InitializeShadowMemoryPlatform();
 void FlushShadowMemory();
 void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive);
 
@@ -294,6 +765,8 @@
     void *abstime), void *c, void *m, void *abstime,
     void(*cleanup)(void *arg), void *arg);
 
+void DestroyThreadState();
+
 }  // namespace __tsan
 
 #endif  // TSAN_PLATFORM_H
diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc
index 0359f5e..6602561 100644
--- a/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/lib/tsan/rtl/tsan_platform_linux.cc
@@ -67,6 +67,11 @@
 static uptr g_data_start;
 static uptr g_data_end;
 
+#ifdef TSAN_RUNTIME_VMA
+// Runtime detected VMA size.
+uptr vmaSize;
+#endif
+
 enum {
   MemTotal  = 0,
   MemShadow = 1,
@@ -82,29 +87,30 @@
 void FillProfileCallback(uptr p, uptr rss, bool file,
                          uptr *mem, uptr stats_size) {
   mem[MemTotal] += rss;
-  if (p >= kShadowBeg && p < kShadowEnd)
+  if (p >= ShadowBeg() && p < ShadowEnd())
     mem[MemShadow] += rss;
-  else if (p >= kMetaShadowBeg && p < kMetaShadowEnd)
+  else if (p >= MetaShadowBeg() && p < MetaShadowEnd())
     mem[MemMeta] += rss;
 #ifndef SANITIZER_GO
-  else if (p >= kHeapMemBeg && p < kHeapMemEnd)
+  else if (p >= HeapMemBeg() && p < HeapMemEnd())
     mem[MemHeap] += rss;
-  else if (p >= kLoAppMemBeg && p < kLoAppMemEnd)
+  else if (p >= LoAppMemBeg() && p < LoAppMemEnd())
     mem[file ? MemFile : MemMmap] += rss;
-  else if (p >= kHiAppMemBeg && p < kHiAppMemEnd)
+  else if (p >= HiAppMemBeg() && p < HiAppMemEnd())
     mem[file ? MemFile : MemMmap] += rss;
 #else
-  else if (p >= kAppMemBeg && p < kAppMemEnd)
+  else if (p >= AppMemBeg() && p < AppMemEnd())
     mem[file ? MemFile : MemMmap] += rss;
 #endif
-  else if (p >= kTraceMemBeg && p < kTraceMemEnd)
+  else if (p >= TraceMemBeg() && p < TraceMemEnd())
     mem[MemTrace] += rss;
   else
     mem[MemOther] += rss;
 }
 
 void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive) {
-  uptr mem[MemCount] = {};
+  uptr mem[MemCount];
+  internal_memset(mem, 0, sizeof(mem[0]) * MemCount);
   __sanitizer::GetMemoryProfile(FillProfileCallback, mem, 7);
   StackDepotStats *stacks = StackDepotGetStats();
   internal_snprintf(buf, buf_size,
@@ -121,7 +127,7 @@
 void FlushShadowMemoryCallback(
     const SuspendedThreadsList &suspended_threads_list,
     void *argument) {
-  FlushUnneededShadowMemory(kShadowBeg, kShadowEnd - kShadowBeg);
+  FlushUnneededShadowMemory(ShadowBeg(), ShadowEnd() - ShadowBeg());
 }
 #endif
 
@@ -132,17 +138,6 @@
 }
 
 #ifndef SANITIZER_GO
-static void ProtectRange(uptr beg, uptr end) {
-  CHECK_LE(beg, end);
-  if (beg == end)
-    return;
-  if (beg != (uptr)MmapNoAccess(beg, end - beg)) {
-    Printf("FATAL: ThreadSanitizer can not protect [%zx,%zx]\n", beg, end);
-    Printf("FATAL: Make sure you are not using unlimited stack\n");
-    Die();
-  }
-}
-
 // Mark shadow for .rodata sections with the special kShadowRodata marker.
 // Accesses to .rodata can't race, so this saves time, memory and trace space.
 static void MapRodata() {
@@ -200,51 +195,7 @@
   internal_close(fd);
 }
 
-void InitializeShadowMemory() {
-  // Map memory shadow.
-  uptr shadow =
-      (uptr)MmapFixedNoReserve(kShadowBeg, kShadowEnd - kShadowBeg, "shadow");
-  if (shadow != kShadowBeg) {
-    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
-    Printf("FATAL: Make sure to compile with -fPIE and "
-               "to link with -pie (%p, %p).\n", shadow, kShadowBeg);
-    Die();
-  }
-  // This memory range is used for thread stacks and large user mmaps.
-  // Frequently a thread uses only a small part of stack and similarly
-  // a program uses a small part of large mmap. On some programs
-  // we see 20% memory usage reduction without huge pages for this range.
-  // FIXME: don't use constants here.
-#if defined(__x86_64__)
-  const uptr kMadviseRangeBeg  = 0x7f0000000000ull;
-  const uptr kMadviseRangeSize = 0x010000000000ull;
-#elif defined(__mips64)
-  const uptr kMadviseRangeBeg  = 0xff00000000ull;
-  const uptr kMadviseRangeSize = 0x0100000000ull;
-#endif
-  NoHugePagesInRegion(MemToShadow(kMadviseRangeBeg),
-                      kMadviseRangeSize * kShadowMultiplier);
-  if (common_flags()->use_madv_dontdump)
-    DontDumpShadowMemory(kShadowBeg, kShadowEnd - kShadowBeg);
-  DPrintf("memory shadow: %zx-%zx (%zuGB)\n",
-      kShadowBeg, kShadowEnd,
-      (kShadowEnd - kShadowBeg) >> 30);
-
-  // Map meta shadow.
-  uptr meta_size = kMetaShadowEnd - kMetaShadowBeg;
-  uptr meta =
-      (uptr)MmapFixedNoReserve(kMetaShadowBeg, meta_size, "meta shadow");
-  if (meta != kMetaShadowBeg) {
-    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
-    Printf("FATAL: Make sure to compile with -fPIE and "
-               "to link with -pie (%p, %p).\n", meta, kMetaShadowBeg);
-    Die();
-  }
-  if (common_flags()->use_madv_dontdump)
-    DontDumpShadowMemory(meta, meta_size);
-  DPrintf("meta shadow: %zx-%zx (%zuGB)\n",
-      meta, meta + meta_size, meta_size >> 30);
-
+void InitializeShadowMemoryPlatform() {
   MapRodata();
 }
 
@@ -288,32 +239,27 @@
   CHECK_LT((uptr)&g_data_start, g_data_end);
 }
 
-static void CheckAndProtect() {
-  // Ensure that the binary is indeed compiled with -pie.
-  MemoryMappingLayout proc_maps(true);
-  uptr p, end;
-  while (proc_maps.Next(&p, &end, 0, 0, 0, 0)) {
-    if (IsAppMem(p))
-      continue;
-    if (p >= kHeapMemEnd &&
-        p < HeapEnd())
-      continue;
-    if (p >= kVdsoBeg)  // vdso
-      break;
-    Printf("FATAL: ThreadSanitizer: unexpected memory mapping %p-%p\n", p, end);
+#endif  // #ifndef SANITIZER_GO
+
+void InitializePlatformEarly() {
+#ifdef TSAN_RUNTIME_VMA
+  vmaSize =
+    (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
+#if defined(__aarch64__)
+  if (vmaSize != 39 && vmaSize != 42) {
+    Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
+    Printf("FATAL: Found %d - Supported 39 and 42\n", vmaSize);
     Die();
   }
-
-  ProtectRange(kLoAppMemEnd, kShadowBeg);
-  ProtectRange(kShadowEnd, kMetaShadowBeg);
-  ProtectRange(kMetaShadowEnd, kTraceMemBeg);
-  // Memory for traces is mapped lazily in MapThreadTrace.
-  // Protect the whole range for now, so that user does not map something here.
-  ProtectRange(kTraceMemBeg, kTraceMemEnd);
-  ProtectRange(kTraceMemEnd, kHeapMemBeg);
-  ProtectRange(HeapEnd(), kHiAppMemBeg);
+#elif defined(__powerpc64__)
+  if (vmaSize != 44 && vmaSize != 46) {
+    Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
+    Printf("FATAL: Found %d - Supported 44 and 46\n", vmaSize);
+    Die();
+  }
+#endif
+#endif
 }
-#endif  // #ifndef SANITIZER_GO
 
 void InitializePlatform() {
   DisableCoreDumperIfNecessary();
@@ -363,7 +309,7 @@
 // This is required to properly "close" the fds, because we do not see internal
 // closes within glibc. The code is a pure hack.
 int ExtractResolvFDs(void *state, int *fds, int nfd) {
-#if SANITIZER_LINUX
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
   int cnt = 0;
   __res_state *statp = (__res_state*)state;
   for (int i = 0; i < MAXNS && cnt < nfd; i++) {
@@ -411,6 +357,10 @@
 }
 #endif
 
+#ifndef SANITIZER_GO
+void ReplaceSystemMalloc() { }
+#endif
+
 }  // namespace __tsan
 
 #endif  // SANITIZER_LINUX || SANITIZER_FREEBSD
diff --git a/lib/tsan/rtl/tsan_platform_mac.cc b/lib/tsan/rtl/tsan_platform_mac.cc
index b72d9b0..31caf37 100644
--- a/lib/tsan/rtl/tsan_platform_mac.cc
+++ b/lib/tsan/rtl/tsan_platform_mac.cc
@@ -15,8 +15,10 @@
 #include "sanitizer_common/sanitizer_platform.h"
 #if SANITIZER_MAC
 
+#include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_posix.h"
 #include "sanitizer_common/sanitizer_procmaps.h"
 #include "tsan_platform.h"
 #include "tsan_rtl.h"
@@ -40,6 +42,62 @@
 
 namespace __tsan {
 
+#ifndef SANITIZER_GO
+static void *SignalSafeGetOrAllocate(uptr *dst, uptr size) {
+  atomic_uintptr_t *a = (atomic_uintptr_t *)dst;
+  void *val = (void *)atomic_load_relaxed(a);
+  atomic_signal_fence(memory_order_acquire);  // Turns the previous load into
+                                              // acquire wrt signals.
+  if (UNLIKELY(val == nullptr)) {
+    val = (void *)internal_mmap(nullptr, size, PROT_READ | PROT_WRITE,
+                                MAP_PRIVATE | MAP_ANON, -1, 0);
+    CHECK(val);
+    void *cmp = nullptr;
+    if (!atomic_compare_exchange_strong(a, (uintptr_t *)&cmp, (uintptr_t)val,
+                                        memory_order_acq_rel)) {
+      internal_munmap(val, size);
+      val = cmp;
+    }
+  }
+  return val;
+}
+
+// On OS X, accessing TLVs via __thread or manually by using pthread_key_* is
+// problematic, because there are several places where interceptors are called
+// when TLVs are not accessible (early process startup, thread cleanup, ...).
+// The following provides a "poor man's TLV" implementation, where we use the
+// shadow memory of the pointer returned by pthread_self() to store a pointer to
+// the ThreadState object. The main thread's ThreadState pointer is stored
+// separately in a static variable, because we need to access it even before the
+// shadow memory is set up.
+static uptr main_thread_identity = 0;
+static ThreadState *main_thread_state = nullptr;
+
+ThreadState *cur_thread() {
+  ThreadState **fake_tls;
+  uptr thread_identity = (uptr)pthread_self();
+  if (thread_identity == main_thread_identity || main_thread_identity == 0) {
+    fake_tls = &main_thread_state;
+  } else {
+    fake_tls = (ThreadState **)MemToShadow(thread_identity);
+  }
+  ThreadState *thr = (ThreadState *)SignalSafeGetOrAllocate(
+      (uptr *)fake_tls, sizeof(ThreadState));
+  return thr;
+}
+
+// TODO(kuba.brecka): This is not async-signal-safe. In particular, we call
+// munmap first and then clear `fake_tls`; if we receive a signal in between,
+// handler will try to access the unmapped ThreadState.
+void cur_thread_finalize() {
+  uptr thread_identity = (uptr)pthread_self();
+  CHECK_NE(thread_identity, main_thread_identity);
+  ThreadState **fake_tls = (ThreadState **)MemToShadow(thread_identity);
+  internal_munmap(*fake_tls, sizeof(ThreadState));
+  *fake_tls = nullptr;
+}
+#endif
+
 uptr GetShadowMemoryConsumption() {
   return 0;
 }
@@ -51,28 +109,62 @@
 }
 
 #ifndef SANITIZER_GO
-void InitializeShadowMemory() {
-  uptr shadow = (uptr)MmapFixedNoReserve(kShadowBeg,
-    kShadowEnd - kShadowBeg);
-  if (shadow != kShadowBeg) {
-    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
-    Printf("FATAL: Make sure to compile with -fPIE and "
-           "to link with -pie.\n");
-    Die();
+void InitializeShadowMemoryPlatform() { }
+
+// On OS X, GCD worker threads are created without a call to pthread_create. We
+// need to properly register these threads with ThreadCreate and ThreadStart.
+// These threads don't have a parent thread, as they are created "spuriously".
+// We're using a libpthread API that notifies us about a newly created thread.
+// The `thread == pthread_self()` check indicates this is actually a worker
+// thread. If it's just a regular thread, this hook is called on the parent
+// thread.
+typedef void (*pthread_introspection_hook_t)(unsigned int event,
+                                             pthread_t thread, void *addr,
+                                             size_t size);
+extern "C" pthread_introspection_hook_t pthread_introspection_hook_install(
+    pthread_introspection_hook_t hook);
+static const uptr PTHREAD_INTROSPECTION_THREAD_CREATE = 1;
+static const uptr PTHREAD_INTROSPECTION_THREAD_TERMINATE = 3;
+static pthread_introspection_hook_t prev_pthread_introspection_hook;
+static void my_pthread_introspection_hook(unsigned int event, pthread_t thread,
+                                          void *addr, size_t size) {
+  if (event == PTHREAD_INTROSPECTION_THREAD_CREATE) {
+    if (thread == pthread_self()) {
+      // The current thread is a newly created GCD worker thread.
+      ThreadState *parent_thread_state = nullptr;  // No parent.
+      int tid = ThreadCreate(parent_thread_state, 0, (uptr)thread, true);
+      CHECK_NE(tid, 0);
+      ThreadState *thr = cur_thread();
+      ThreadStart(thr, tid, GetTid());
+    }
+  } else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) {
+    if (thread == pthread_self()) {
+      ThreadState *thr = cur_thread();
+      if (thr->tctx) {
+        DestroyThreadState();
+      }
+    }
   }
-  if (common_flags()->use_madv_dontdump)
-    DontDumpShadowMemory(kShadowBeg, kShadowEnd - kShadowBeg);
-  DPrintf("kShadow %zx-%zx (%zuGB)\n",
-      kShadowBeg, kShadowEnd,
-      (kShadowEnd - kShadowBeg) >> 30);
-  DPrintf("kAppMem %zx-%zx (%zuGB)\n",
-      kAppMemBeg, kAppMemEnd,
-      (kAppMemEnd - kAppMemBeg) >> 30);
+
+  if (prev_pthread_introspection_hook != nullptr)
+    prev_pthread_introspection_hook(event, thread, addr, size);
 }
 #endif
 
+void InitializePlatformEarly() {
+}
+
 void InitializePlatform() {
   DisableCoreDumperIfNecessary();
+#ifndef SANITIZER_GO
+  CheckAndProtect();
+
+  CHECK_EQ(main_thread_identity, 0);
+  main_thread_identity = (uptr)pthread_self();
+
+  prev_pthread_introspection_hook =
+      pthread_introspection_hook_install(&my_pthread_introspection_hook);
+#endif
 }
 
 #ifndef SANITIZER_GO
@@ -91,6 +183,10 @@
 }
 #endif
 
+bool IsGlobalVar(uptr addr) {
+  return false;
+}
+
 }  // namespace __tsan
 
 #endif  // SANITIZER_MAC
diff --git a/lib/tsan/rtl/tsan_platform_posix.cc b/lib/tsan/rtl/tsan_platform_posix.cc
new file mode 100644
index 0000000..90476cb
--- /dev/null
+++ b/lib/tsan/rtl/tsan_platform_posix.cc
@@ -0,0 +1,151 @@
+//===-- tsan_platform_posix.cc --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// POSIX-specific code.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_POSIX
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+#ifndef SANITIZER_GO
+void InitializeShadowMemory() {
+  // Map memory shadow.
+  uptr shadow =
+      (uptr)MmapFixedNoReserve(ShadowBeg(), ShadowEnd() - ShadowBeg(),
+                               "shadow");
+  if (shadow != ShadowBeg()) {
+    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
+    Printf("FATAL: Make sure to compile with -fPIE and "
+               "to link with -pie (%p, %p).\n", shadow, ShadowBeg());
+    Die();
+  }
+  // This memory range is used for thread stacks and large user mmaps.
+  // Frequently a thread uses only a small part of stack and similarly
+  // a program uses a small part of large mmap. On some programs
+  // we see 20% memory usage reduction without huge pages for this range.
+  // FIXME: don't use constants here.
+#if defined(__x86_64__)
+  const uptr kMadviseRangeBeg  = 0x7f0000000000ull;
+  const uptr kMadviseRangeSize = 0x010000000000ull;
+#elif defined(__mips64)
+  const uptr kMadviseRangeBeg  = 0xff00000000ull;
+  const uptr kMadviseRangeSize = 0x0100000000ull;
+#elif defined(__aarch64__)
+  uptr kMadviseRangeBeg = 0;
+  uptr kMadviseRangeSize = 0;
+  if (vmaSize == 39) {
+    kMadviseRangeBeg  = 0x7d00000000ull;
+    kMadviseRangeSize = 0x0300000000ull;
+  } else if (vmaSize == 42) {
+    kMadviseRangeBeg  = 0x3f000000000ull;
+    kMadviseRangeSize = 0x01000000000ull;
+  } else {
+    DCHECK(0);
+  }
+#elif defined(__powerpc64__)
+  uptr kMadviseRangeBeg = 0;
+  uptr kMadviseRangeSize = 0;
+  if (vmaSize == 44) {
+    kMadviseRangeBeg  = 0x0f60000000ull;
+    kMadviseRangeSize = 0x0010000000ull;
+  } else if (vmaSize == 46) {
+    kMadviseRangeBeg  = 0x3f0000000000ull;
+    kMadviseRangeSize = 0x010000000000ull;
+  } else {
+    DCHECK(0);
+  }
+#endif
+  NoHugePagesInRegion(MemToShadow(kMadviseRangeBeg),
+                      kMadviseRangeSize * kShadowMultiplier);
+  // Meta shadow is compressing and we don't flush it,
+  // so it makes sense to mark it as NOHUGEPAGE to not over-allocate memory.
+  // On one program it reduces memory consumption from 5GB to 2.5GB.
+  NoHugePagesInRegion(MetaShadowBeg(), MetaShadowEnd() - MetaShadowBeg());
+  if (common_flags()->use_madv_dontdump)
+    DontDumpShadowMemory(ShadowBeg(), ShadowEnd() - ShadowBeg());
+  DPrintf("memory shadow: %zx-%zx (%zuGB)\n",
+      ShadowBeg(), ShadowEnd(),
+      (ShadowEnd() - ShadowBeg()) >> 30);
+
+  // Map meta shadow.
+  uptr meta_size = MetaShadowEnd() - MetaShadowBeg();
+  uptr meta =
+      (uptr)MmapFixedNoReserve(MetaShadowBeg(), meta_size, "meta shadow");
+  if (meta != MetaShadowBeg()) {
+    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
+    Printf("FATAL: Make sure to compile with -fPIE and "
+               "to link with -pie (%p, %p).\n", meta, MetaShadowBeg());
+    Die();
+  }
+  if (common_flags()->use_madv_dontdump)
+    DontDumpShadowMemory(meta, meta_size);
+  DPrintf("meta shadow: %zx-%zx (%zuGB)\n",
+      meta, meta + meta_size, meta_size >> 30);
+
+  InitializeShadowMemoryPlatform();
+}
+
+static void ProtectRange(uptr beg, uptr end) {
+  CHECK_LE(beg, end);
+  if (beg == end)
+    return;
+  if (beg != (uptr)MmapNoAccess(beg, end - beg)) {
+    Printf("FATAL: ThreadSanitizer can not protect [%zx,%zx]\n", beg, end);
+    Printf("FATAL: Make sure you are not using unlimited stack\n");
+    Die();
+  }
+}
+
+void CheckAndProtect() {
+  // Ensure that the binary is indeed compiled with -pie.
+  MemoryMappingLayout proc_maps(true);
+  uptr p, end, prot;
+  while (proc_maps.Next(&p, &end, 0, 0, 0, &prot)) {
+    if (IsAppMem(p))
+      continue;
+    if (p >= HeapMemEnd() &&
+        p < HeapEnd())
+      continue;
+    if (prot == 0)  // Zero page or mprotected.
+      continue;
+    if (p >= VdsoBeg())  // vdso
+      break;
+    Printf("FATAL: ThreadSanitizer: unexpected memory mapping %p-%p\n", p, end);
+    Die();
+  }
+
+  ProtectRange(LoAppMemEnd(), ShadowBeg());
+  ProtectRange(ShadowEnd(), MetaShadowBeg());
+#ifdef TSAN_MID_APP_RANGE
+  ProtectRange(MetaShadowEnd(), MidAppMemBeg());
+  ProtectRange(MidAppMemEnd(), TraceMemBeg());
+#else
+  ProtectRange(MetaShadowEnd(), TraceMemBeg());
+#endif
+  // Memory for traces is mapped lazily in MapThreadTrace.
+  // Protect the whole range for now, so that user does not map something here.
+  ProtectRange(TraceMemBeg(), TraceMemEnd());
+  ProtectRange(TraceMemEnd(), HeapMemBeg());
+  ProtectRange(HeapEnd(), HiAppMemBeg());
+}
+#endif
+
+}  // namespace __tsan
+
+#endif  // SANITIZER_POSIX
diff --git a/lib/tsan/rtl/tsan_platform_windows.cc b/lib/tsan/rtl/tsan_platform_windows.cc
index cfbe77d..c6d5058 100644
--- a/lib/tsan/rtl/tsan_platform_windows.cc
+++ b/lib/tsan/rtl/tsan_platform_windows.cc
@@ -31,6 +31,9 @@
 void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive) {
 }
 
+void InitializePlatformEarly() {
+}
+
 void InitializePlatform() {
 }
 
diff --git a/lib/tsan/rtl/tsan_ppc_regs.h b/lib/tsan/rtl/tsan_ppc_regs.h
new file mode 100644
index 0000000..5b43f3d
--- /dev/null
+++ b/lib/tsan/rtl/tsan_ppc_regs.h
@@ -0,0 +1,96 @@
+#define r0 0
+#define r1 1
+#define r2 2
+#define r3 3
+#define r4 4
+#define r5 5
+#define r6 6
+#define r7 7
+#define r8 8
+#define r9 9
+#define r10 10
+#define r11 11
+#define r12 12
+#define r13 13
+#define r14 14
+#define r15 15
+#define r16 16
+#define r17 17
+#define r18 18
+#define r19 19
+#define r20 20
+#define r21 21
+#define r22 22
+#define r23 23
+#define r24 24
+#define r25 25
+#define r26 26
+#define r27 27
+#define r28 28
+#define r29 29
+#define r30 30
+#define r31 31
+#define f0 0
+#define f1 1
+#define f2 2
+#define f3 3
+#define f4 4
+#define f5 5
+#define f6 6
+#define f7 7
+#define f8 8
+#define f9 9
+#define f10 10
+#define f11 11
+#define f12 12
+#define f13 13
+#define f14 14
+#define f15 15
+#define f16 16
+#define f17 17
+#define f18 18
+#define f19 19
+#define f20 20
+#define f21 21
+#define f22 22
+#define f23 23
+#define f24 24
+#define f25 25
+#define f26 26
+#define f27 27
+#define f28 28
+#define f29 29
+#define f30 30
+#define f31 31
+#define v0 0
+#define v1 1
+#define v2 2
+#define v3 3
+#define v4 4
+#define v5 5
+#define v6 6
+#define v7 7
+#define v8 8
+#define v9 9
+#define v10 10
+#define v11 11
+#define v12 12
+#define v13 13
+#define v14 14
+#define v15 15
+#define v16 16
+#define v17 17
+#define v18 18
+#define v19 19
+#define v20 20
+#define v21 21
+#define v22 22
+#define v23 23
+#define v24 24
+#define v25 25
+#define v26 26
+#define v27 27
+#define v28 28
+#define v29 29
+#define v30 30
+#define v31 31
diff --git a/lib/tsan/rtl/tsan_report.cc b/lib/tsan/rtl/tsan_report.cc
index f4b0687..c1d2fd0 100644
--- a/lib/tsan/rtl/tsan_report.cc
+++ b/lib/tsan/rtl/tsan_report.cc
@@ -111,6 +111,12 @@
   return "";
 }
 
+#if SANITIZER_MAC
+static const char *const kInterposedFunctionPrefix = "wrap_";
+#else
+static const char *const kInterposedFunctionPrefix = "__interceptor_";
+#endif
+
 void PrintStack(const ReportStack *ent) {
   if (ent == 0 || ent->frames == 0) {
     Printf("    [failed to restore the stack]\n\n");
@@ -121,7 +127,7 @@
     InternalScopedString res(2 * GetPageSizeCached());
     RenderFrame(&res, common_flags()->stack_trace_format, i, frame->info,
                 common_flags()->symbolize_vs_style,
-                common_flags()->strip_path_prefix, "__interceptor_");
+                common_flags()->strip_path_prefix, kInterposedFunctionPrefix);
     Printf("%s\n", res.data());
   }
   Printf("\n");
@@ -165,9 +171,14 @@
   Printf("%s", d.Location());
   if (loc->type == ReportLocationGlobal) {
     const DataInfo &global = loc->global;
-    Printf("  Location is global '%s' of size %zu at %p (%s+%p)\n\n",
-           global.name, global.size, global.start,
-           StripModuleName(global.module), global.module_offset);
+    if (global.size != 0)
+      Printf("  Location is global '%s' of size %zu at %p (%s+%p)\n\n",
+             global.name, global.size, global.start,
+             StripModuleName(global.module), global.module_offset);
+    else
+      Printf("  Location is global '%s' at %p (%s+%p)\n\n", global.name,
+             global.start, StripModuleName(global.module),
+             global.module_offset);
   } else if (loc->type == ReportLocationHeap) {
     char thrbuf[kThreadBufSize];
     Printf("  Location is heap block of size %zu at %p allocated by %s:\n",
@@ -256,10 +267,15 @@
   if (frame == 0)
     return false;
   const char *file = frame->info.file;
-  return file != 0 &&
-         (internal_strstr(file, "tsan_interceptors.cc") ||
-          internal_strstr(file, "sanitizer_common_interceptors.inc") ||
-          internal_strstr(file, "tsan_interface_"));
+  const char *module = frame->info.module;
+  if (file != 0 &&
+      (internal_strstr(file, "tsan_interceptors.cc") ||
+       internal_strstr(file, "sanitizer_common_interceptors.inc") ||
+       internal_strstr(file, "tsan_interface_")))
+    return true;
+  if (module != 0 && (internal_strstr(module, "libclang_rt.tsan_")))
+    return true;
+  return false;
 }
 
 static SymbolizedStack *SkipTsanInternalFrames(SymbolizedStack *frames) {
diff --git a/lib/tsan/rtl/tsan_rtl.cc b/lib/tsan/rtl/tsan_rtl.cc
index 63c356b..4df4db5 100644
--- a/lib/tsan/rtl/tsan_rtl.cc
+++ b/lib/tsan/rtl/tsan_rtl.cc
@@ -44,7 +44,7 @@
 
 namespace __tsan {
 
-#ifndef SANITIZER_GO
+#if !defined(SANITIZER_GO) && !SANITIZER_MAC
 THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(64);
 #endif
 static char ctx_placeholder[sizeof(Context)] ALIGNED(64);
@@ -55,12 +55,12 @@
 bool OnFinalize(bool failed);
 void OnInitialize();
 #else
-SANITIZER_INTERFACE_ATTRIBUTE
-bool WEAK OnFinalize(bool failed) {
+SANITIZER_WEAK_CXX_DEFAULT_IMPL
+bool OnFinalize(bool failed) {
   return failed;
 }
-SANITIZER_INTERFACE_ATTRIBUTE
-void WEAK OnInitialize() {}
+SANITIZER_WEAK_CXX_DEFAULT_IMPL
+void OnInitialize() {}
 #endif
 
 static char thread_registry_placeholder[sizeof(ThreadRegistry)];
@@ -99,8 +99,10 @@
   , nmissed_expected()
   , thread_registry(new(thread_registry_placeholder) ThreadRegistry(
       CreateThreadContext, kMaxTid, kThreadQuarantineSize, kMaxTidReuse))
+  , racy_mtx(MutexTypeRacy, StatMtxRacy)
   , racy_stacks(MBlockRacyStacks)
   , racy_addresses(MBlockRacyAddresses)
+  , fired_suppressions_mtx(MutexTypeFired, StatMtxFired)
   , fired_suppressions(8) {
 }
 
@@ -271,8 +273,8 @@
 
 void MapThreadTrace(uptr addr, uptr size, const char *name) {
   DPrintf("#0: Mapping trace at %p-%p(0x%zx)\n", addr, addr + size, size);
-  CHECK_GE(addr, kTraceMemBeg);
-  CHECK_LE(addr + size, kTraceMemEnd);
+  CHECK_GE(addr, TraceMemBeg());
+  CHECK_LE(addr + size, TraceMemEnd());
   CHECK_EQ(addr, addr & ~((64 << 10) - 1));  // windows wants 64K alignment
   uptr addr1 = (uptr)MmapFixedNoReserve(addr, size, name);
   if (addr1 != addr) {
@@ -283,9 +285,8 @@
 }
 
 static void CheckShadowMapping() {
-  for (uptr i = 0; i < ARRAY_SIZE(UserRegions); i += 2) {
-    const uptr beg = UserRegions[i];
-    const uptr end = UserRegions[i + 1];
+  uptr beg, end;
+  for (int i = 0; GetUserRegion(i, &beg, &end); i++) {
     VPrintf(3, "checking shadow region %p-%p\n", beg, end);
     for (uptr p0 = beg; p0 <= end; p0 += (end - beg) / 4) {
       for (int x = -1; x <= 1; x++) {
@@ -318,10 +319,15 @@
 
   ctx = new(ctx_placeholder) Context;
   const char *options = GetEnv(kTsanOptionsEnv);
-  InitializeFlags(&ctx->flags, options);
   CacheBinaryName();
+  InitializeFlags(&ctx->flags, options);
+  InitializePlatformEarly();
 #ifndef SANITIZER_GO
+  // Re-exec ourselves if we need to set additional env or command line args.
+  MaybeReexec();
+
   InitializeAllocator();
+  ReplaceSystemMalloc();
 #endif
   InitializeInterceptors();
   CheckShadowMapping();
@@ -417,7 +423,7 @@
   StatOutput(ctx->stat);
 #endif
 
-  return failed ? flags()->exitcode : 0;
+  return failed ? common_flags()->exitcode : 0;
 }
 
 #ifndef SANITIZER_GO
diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h
index a13e4b6..04104b1 100644
--- a/lib/tsan/rtl/tsan_rtl.h
+++ b/lib/tsan/rtl/tsan_rtl.h
@@ -54,7 +54,7 @@
 
 #ifndef SANITIZER_GO
 struct MapUnmapCallback;
-#ifdef __mips64
+#if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__)
 static const uptr kAllocatorSpace = 0;
 static const uptr kAllocatorSize = SANITIZER_MMAP_RANGE_SIZE;
 static const uptr kAllocatorRegionSizeLog = 20;
@@ -66,7 +66,8 @@
     CompactSizeClassMap, kAllocatorRegionSizeLog, ByteMap,
     MapUnmapCallback> PrimaryAllocator;
 #else
-typedef SizeClassAllocator64<kHeapMemBeg, kHeapMemEnd - kHeapMemBeg, 0,
+typedef SizeClassAllocator64<Mapping::kHeapMemBeg,
+    Mapping::kHeapMemEnd - Mapping::kHeapMemBeg, 0,
     DefaultSizeClassMap, MapUnmapCallback> PrimaryAllocator;
 #endif
 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
@@ -410,12 +411,18 @@
 };
 
 #ifndef SANITIZER_GO
+#if SANITIZER_MAC
+ThreadState *cur_thread();
+void cur_thread_finalize();
+#else
 __attribute__((tls_model("initial-exec")))
 extern THREADLOCAL char cur_thread_placeholder[];
 INLINE ThreadState *cur_thread() {
   return reinterpret_cast<ThreadState *>(&cur_thread_placeholder);
 }
-#endif
+INLINE void cur_thread_finalize() { }
+#endif  // SANITIZER_MAC
+#endif  // SANITIZER_GO
 
 class ThreadContext : public ThreadContextBase {
  public:
@@ -458,7 +465,7 @@
 
 struct FiredSuppression {
   ReportType type;
-  uptr pc;
+  uptr pc_or_addr;
   Suppression *supp;
 };
 
@@ -480,9 +487,11 @@
 
   ThreadRegistry *thread_registry;
 
+  Mutex racy_mtx;
   Vector<RacyStacks> racy_stacks;
   Vector<RacyAddress> racy_addresses;
   // Number of fired suppressions may be large enough.
+  Mutex fired_suppressions_mtx;
   InternalMmapVector<FiredSuppression> fired_suppressions;
   DDetector *dd;
 
@@ -587,8 +596,7 @@
 
 void ReportRace(ThreadState *thr);
 bool OutputReport(ThreadState *thr, const ScopedReport &srep);
-bool IsFiredSuppression(Context *ctx, const ScopedReport &srep,
-                        StackTrace trace);
+bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace);
 bool IsExpectedReport(uptr addr, uptr size);
 void PrintMatchedBenignRaces();
 
@@ -708,7 +716,7 @@
 // The trick is that the call preserves all registers and the compiler
 // does not treat it as a call.
 // If it does not work for you, use normal call.
-#if !SANITIZER_DEBUG && defined(__x86_64__)
+#if !SANITIZER_DEBUG && defined(__x86_64__) && !SANITIZER_MAC
 // The caller may not create the stack frame for itself at all,
 // so we create a reserve stack frame for it (1024b must be enough).
 #define HACKY_CALL(f) \
@@ -754,11 +762,7 @@
 
 #ifndef SANITIZER_GO
 uptr ALWAYS_INLINE HeapEnd() {
-#if SANITIZER_CAN_USE_ALLOCATOR64
-  return kHeapMemEnd + PrimaryAllocator::AdditionalSize();
-#else
-  return kHeapMemEnd;
-#endif
+  return HeapMemEnd() + PrimaryAllocator::AdditionalSize();
 }
 #endif
 
diff --git a/lib/tsan/rtl/tsan_rtl_aarch64.S b/lib/tsan/rtl/tsan_rtl_aarch64.S
new file mode 100644
index 0000000..9cea3cf
--- /dev/null
+++ b/lib/tsan/rtl/tsan_rtl_aarch64.S
@@ -0,0 +1,206 @@
+#include "sanitizer_common/sanitizer_asm.h"
+.section .text
+
+.hidden __tsan_setjmp
+.comm _ZN14__interception11real_setjmpE,8,8
+.type setjmp, @function
+setjmp:
+  CFI_STARTPROC
+
+  // save env parameters for function call
+  stp     x29, x30, [sp, -32]!
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (29, -32)
+  CFI_OFFSET (30, -24)
+
+  // Adjust the SP for previous frame
+  add     x29, sp, 0
+  CFI_DEF_CFA_REGISTER (29)
+
+  // Save jmp_buf
+  str     x19, [sp, 16]
+  CFI_OFFSET (19, -16)
+  mov     x19, x0
+
+  // SP pointer mangling (see glibc setjmp)
+  adrp    x2, :got:__pointer_chk_guard
+  ldr     x2, [x2, #:got_lo12:__pointer_chk_guard]
+  add     x0, x29, 32
+  ldr     x2, [x2]
+  eor     x1, x2, x0
+
+  // call tsan interceptor
+  bl      __tsan_setjmp
+
+  // restore env parameter
+  mov     x0, x19
+  ldr     x19, [sp, 16]
+  ldp     x29, x30, [sp], 32
+  CFI_RESTORE (30)
+  CFI_RESTORE (19)
+  CFI_DEF_CFA (31, 0)
+
+  // tail jump to libc setjmp
+  adrp    x1, :got:_ZN14__interception11real_setjmpE
+  ldr     x1, [x1, #:got_lo12:_ZN14__interception11real_setjmpE]
+  ldr     x1, [x1]
+  br      x1
+
+  CFI_ENDPROC
+.size setjmp, .-setjmp
+
+.comm _ZN14__interception12real__setjmpE,8,8
+.globl _setjmp
+.type _setjmp, @function
+_setjmp:
+  CFI_STARTPROC
+
+  // save env parameters for function call
+  stp     x29, x30, [sp, -32]!
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (29, -32)
+  CFI_OFFSET (30, -24)
+
+  // Adjust the SP for previous frame
+  add     x29, sp, 0
+  CFI_DEF_CFA_REGISTER (29)
+
+  // Save jmp_buf
+  str     x19, [sp, 16]
+  CFI_OFFSET (19, -16)
+  mov     x19, x0
+
+  // SP pointer mangling (see glibc setjmp)
+  adrp    x2, :got:__pointer_chk_guard
+  ldr     x2, [x2, #:got_lo12:__pointer_chk_guard]
+  add     x0, x29, 32
+  ldr     x2, [x2]
+  eor     x1, x2, x0
+
+  // call tsan interceptor
+  bl      __tsan_setjmp
+
+  // Restore jmp_buf parameter
+  mov     x0, x19
+  ldr     x19, [sp, 16]
+  ldp     x29, x30, [sp], 32
+  CFI_RESTORE (30)
+  CFI_RESTORE (19)
+  CFI_DEF_CFA (31, 0)
+
+  // tail jump to libc setjmp
+  adrp    x1, :got:_ZN14__interception12real__setjmpE
+  ldr     x1, [x1, #:got_lo12:_ZN14__interception12real__setjmpE]
+  ldr     x1, [x1]
+  br      x1
+
+  CFI_ENDPROC
+.size _setjmp, .-_setjmp
+
+.comm _ZN14__interception14real_sigsetjmpE,8,8
+.globl sigsetjmp
+.type sigsetjmp, @function
+sigsetjmp:
+  CFI_STARTPROC
+
+  // save env parameters for function call
+  stp     x29, x30, [sp, -32]!
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (29, -32)
+  CFI_OFFSET (30, -24)
+
+  // Adjust the SP for previous frame
+  add     x29, sp, 0
+  CFI_DEF_CFA_REGISTER (29)
+
+  // Save jmp_buf and savesigs
+  stp     x19, x20, [sp, 16]
+  CFI_OFFSET (19, -16)
+  CFI_OFFSET (20, -8)
+  mov     w20, w1
+  mov     x19, x0
+
+  // SP pointer mangling (see glibc setjmp)
+  adrp    x2, :got:__pointer_chk_guard
+  ldr     x2, [x2, #:got_lo12:__pointer_chk_guard]
+  add     x0, x29, 32
+  ldr     x2, [x2]
+  eor     x1, x2, x0
+
+  // call tsan interceptor
+  bl      __tsan_setjmp
+
+  // restore env parameter
+  mov     w1, w20
+  mov     x0, x19
+  ldp     x19, x20, [sp, 16]
+  ldp     x29, x30, [sp], 32
+  CFI_RESTORE (30)
+  CFI_RESTORE (29)
+  CFI_RESTORE (19)
+  CFI_RESTORE (20)
+  CFI_DEF_CFA (31, 0)
+
+  // tail jump to libc sigsetjmp
+  adrp    x2, :got:_ZN14__interception14real_sigsetjmpE
+  ldr     x2, [x2, #:got_lo12:_ZN14__interception14real_sigsetjmpE]
+  ldr     x2, [x2]
+  br      x2
+  CFI_ENDPROC
+.size sigsetjmp, .-sigsetjmp
+
+.comm _ZN14__interception16real___sigsetjmpE,8,8
+.globl __sigsetjmp
+.type __sigsetjmp, @function
+__sigsetjmp:
+  CFI_STARTPROC
+
+  // save env parameters for function call
+  stp     x29, x30, [sp, -32]!
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (29, -32)
+  CFI_OFFSET (30, -24)
+
+  // Adjust the SP for previous frame
+  add     x29, sp, 0
+  CFI_DEF_CFA_REGISTER (29)
+
+  // Save jmp_buf and savesigs
+  stp     x19, x20, [sp, 16]
+  CFI_OFFSET (19, -16)
+  CFI_OFFSET (20, -8)
+  mov     w20, w1
+  mov     x19, x0
+
+  // SP pointer mangling (see glibc setjmp)
+  adrp    x2, :got:__pointer_chk_guard
+  ldr     x2, [x2, #:got_lo12:__pointer_chk_guard]
+  add     x0, x29, 32
+  ldr     x2, [x2]
+  eor     x1, x2, x0
+
+  // call tsan interceptor
+  bl      __tsan_setjmp
+
+  mov     w1, w20
+  mov     x0, x19
+  ldp     x19, x20, [sp, 16]
+  ldp     x29, x30, [sp], 32
+  CFI_RESTORE (30)
+  CFI_RESTORE (29)
+  CFI_RESTORE (19)
+  CFI_RESTORE (20)
+  CFI_DEF_CFA (31, 0)
+
+  // tail jump to libc __sigsetjmp
+  adrp    x2, :got:_ZN14__interception16real___sigsetjmpE
+  ldr     x2, [x2, #:got_lo12:_ZN14__interception16real___sigsetjmpE]
+  ldr     x2, [x2]
+  br      x2
+  CFI_ENDPROC
+.size __sigsetjmp, .-__sigsetjmp
+
+#if defined(__linux__)
+/* We do not need executable stack.  */
+.section        .note.GNU-stack,"",@progbits
+#endif
diff --git a/lib/tsan/rtl/tsan_rtl_amd64.S b/lib/tsan/rtl/tsan_rtl_amd64.S
index 8db62f9..caa8323 100644
--- a/lib/tsan/rtl/tsan_rtl_amd64.S
+++ b/lib/tsan/rtl/tsan_rtl_amd64.S
@@ -1,9 +1,13 @@
 #include "sanitizer_common/sanitizer_asm.h"
+#if !defined(__APPLE__)
 .section .text
+#else
+.section __TEXT,__text
+#endif
 
-.hidden __tsan_trace_switch
-.globl __tsan_trace_switch_thunk
-__tsan_trace_switch_thunk:
+ASM_HIDDEN(__tsan_trace_switch)
+.globl ASM_TSAN_SYMBOL(__tsan_trace_switch_thunk)
+ASM_TSAN_SYMBOL(__tsan_trace_switch_thunk):
   CFI_STARTPROC
   # Save scratch registers.
   push %rax
@@ -42,7 +46,7 @@
   shr $4, %rsp  # clear 4 lsb, align to 16
   shl $4, %rsp
 
-  call __tsan_trace_switch
+  call ASM_TSAN_SYMBOL(__tsan_trace_switch)
 
   # Unalign stack frame back.
   mov %rbx, %rsp  # restore the original rsp
@@ -81,9 +85,9 @@
   ret
   CFI_ENDPROC
 
-.hidden __tsan_report_race
-.globl __tsan_report_race_thunk
-__tsan_report_race_thunk:
+ASM_HIDDEN(__tsan_report_race)
+.globl ASM_TSAN_SYMBOL(__tsan_report_race_thunk)
+ASM_TSAN_SYMBOL(__tsan_report_race_thunk):
   CFI_STARTPROC
   # Save scratch registers.
   push %rax
@@ -122,7 +126,7 @@
   shr $4, %rsp  # clear 4 lsb, align to 16
   shl $4, %rsp
 
-  call __tsan_report_race
+  call ASM_TSAN_SYMBOL(__tsan_report_race)
 
   # Unalign stack frame back.
   mov %rbx, %rsp  # restore the original rsp
@@ -161,11 +165,13 @@
   ret
   CFI_ENDPROC
 
-.hidden __tsan_setjmp
+ASM_HIDDEN(__tsan_setjmp)
+#if !defined(__APPLE__)
 .comm _ZN14__interception11real_setjmpE,8,8
-.globl setjmp
-.type setjmp, @function
-setjmp:
+#endif
+.globl ASM_TSAN_SYMBOL_INTERCEPTOR(setjmp)
+ASM_TYPE_FUNCTION(ASM_TSAN_SYMBOL_INTERCEPTOR(setjmp))
+ASM_TSAN_SYMBOL_INTERCEPTOR(setjmp):
   CFI_STARTPROC
   // save env parameter
   push %rdi
@@ -175,29 +181,38 @@
 #if defined(__FreeBSD__)
   lea 8(%rsp), %rdi
   mov %rdi, %rsi
-#else
+#elif defined(__APPLE__)
+  lea 16(%rsp), %rdi
+  mov %rdi, %rsi
+#elif defined(__linux__)
   lea 16(%rsp), %rdi
   mov %rdi, %rsi
   xor %fs:0x30, %rsi  // magic mangling of rsp (see libc setjmp)
   rol $0x11, %rsi
+#else
+# error "Unknown platform"
 #endif
   // call tsan interceptor
-  call __tsan_setjmp
+  call ASM_TSAN_SYMBOL(__tsan_setjmp)
   // restore env parameter
   pop %rdi
   CFI_ADJUST_CFA_OFFSET(-8)
   CFI_RESTORE(%rdi)
   // tail jump to libc setjmp
   movl $0, %eax
+#if !defined(__APPLE__)
   movq _ZN14__interception11real_setjmpE@GOTPCREL(%rip), %rdx
   jmp *(%rdx)
+#else
+  jmp ASM_TSAN_SYMBOL(setjmp)
+#endif
   CFI_ENDPROC
-.size setjmp, .-setjmp
+ASM_SIZE(ASM_TSAN_SYMBOL_INTERCEPTOR(setjmp))
 
 .comm _ZN14__interception12real__setjmpE,8,8
-.globl _setjmp
-.type _setjmp, @function
-_setjmp:
+.globl ASM_TSAN_SYMBOL_INTERCEPTOR(_setjmp)
+ASM_TYPE_FUNCTION(ASM_TSAN_SYMBOL_INTERCEPTOR(_setjmp))
+ASM_TSAN_SYMBOL_INTERCEPTOR(_setjmp):
   CFI_STARTPROC
   // save env parameter
   push %rdi
@@ -207,29 +222,38 @@
 #if defined(__FreeBSD__)
   lea 8(%rsp), %rdi
   mov %rdi, %rsi
-#else
+#elif defined(__APPLE__)
+  lea 16(%rsp), %rdi
+  mov %rdi, %rsi
+#elif defined(__linux__)
   lea 16(%rsp), %rdi
   mov %rdi, %rsi
   xor %fs:0x30, %rsi  // magic mangling of rsp (see libc setjmp)
   rol $0x11, %rsi
+#else
+# error "Unknown platform"
 #endif
   // call tsan interceptor
-  call __tsan_setjmp
+  call ASM_TSAN_SYMBOL(__tsan_setjmp)
   // restore env parameter
   pop %rdi
   CFI_ADJUST_CFA_OFFSET(-8)
   CFI_RESTORE(%rdi)
   // tail jump to libc setjmp
   movl $0, %eax
+#if !defined(__APPLE__)
   movq _ZN14__interception12real__setjmpE@GOTPCREL(%rip), %rdx
   jmp *(%rdx)
+#else
+  jmp ASM_TSAN_SYMBOL(_setjmp)
+#endif
   CFI_ENDPROC
-.size _setjmp, .-_setjmp
+ASM_SIZE(ASM_TSAN_SYMBOL_INTERCEPTOR(_setjmp))
 
 .comm _ZN14__interception14real_sigsetjmpE,8,8
-.globl sigsetjmp
-.type sigsetjmp, @function
-sigsetjmp:
+.globl ASM_TSAN_SYMBOL_INTERCEPTOR(sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_TSAN_SYMBOL_INTERCEPTOR(sigsetjmp))
+ASM_TSAN_SYMBOL_INTERCEPTOR(sigsetjmp):
   CFI_STARTPROC
   // save env parameter
   push %rdi
@@ -246,14 +270,19 @@
 #if defined(__FreeBSD__)
   lea 24(%rsp), %rdi
   mov %rdi, %rsi
-#else
+#elif defined(__APPLE__)
+  lea 32(%rsp), %rdi
+  mov %rdi, %rsi
+#elif defined(__linux__)
   lea 32(%rsp), %rdi
   mov %rdi, %rsi
   xor %fs:0x30, %rsi  // magic mangling of rsp (see libc setjmp)
   rol $0x11, %rsi
+#else
+# error "Unknown platform"
 #endif
   // call tsan interceptor
-  call __tsan_setjmp
+  call ASM_TSAN_SYMBOL(__tsan_setjmp)
   // unalign stack frame
   add $8, %rsp
   CFI_ADJUST_CFA_OFFSET(-8)
@@ -267,15 +296,20 @@
   CFI_RESTORE(%rdi)
   // tail jump to libc sigsetjmp
   movl $0, %eax
+#if !defined(__APPLE__)
   movq _ZN14__interception14real_sigsetjmpE@GOTPCREL(%rip), %rdx
   jmp *(%rdx)
+#else
+  jmp ASM_TSAN_SYMBOL(sigsetjmp)
+#endif
   CFI_ENDPROC
-.size sigsetjmp, .-sigsetjmp
+ASM_SIZE(ASM_TSAN_SYMBOL_INTERCEPTOR(sigsetjmp))
 
+#if !defined(__APPLE__)
 .comm _ZN14__interception16real___sigsetjmpE,8,8
-.globl __sigsetjmp
-.type __sigsetjmp, @function
-__sigsetjmp:
+.globl ASM_TSAN_SYMBOL_INTERCEPTOR(__sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_TSAN_SYMBOL_INTERCEPTOR(__sigsetjmp))
+ASM_TSAN_SYMBOL_INTERCEPTOR(__sigsetjmp):
   CFI_STARTPROC
   // save env parameter
   push %rdi
@@ -299,7 +333,7 @@
   rol $0x11, %rsi
 #endif
   // call tsan interceptor
-  call __tsan_setjmp
+  call ASM_TSAN_SYMBOL(__tsan_setjmp)
   // unalign stack frame
   add $8, %rsp
   CFI_ADJUST_CFA_OFFSET(-8)
@@ -316,7 +350,8 @@
   movq _ZN14__interception16real___sigsetjmpE@GOTPCREL(%rip), %rdx
   jmp *(%rdx)
   CFI_ENDPROC
-.size __sigsetjmp, .-__sigsetjmp
+ASM_SIZE(ASM_TSAN_SYMBOL_INTERCEPTOR(__sigsetjmp))
+#endif  // !defined(__APPLE__)
 
 #if defined(__FreeBSD__) || defined(__linux__)
 /* We do not need executable stack.  */
diff --git a/lib/tsan/rtl/tsan_rtl_mutex.cc b/lib/tsan/rtl/tsan_rtl_mutex.cc
index 09180d8..62ab7aa 100644
--- a/lib/tsan/rtl/tsan_rtl_mutex.cc
+++ b/lib/tsan/rtl/tsan_rtl_mutex.cc
@@ -472,7 +472,7 @@
   for (int i = 0; i < r->n; i++) {
     for (int j = 0; j < (flags()->second_deadlock_stack ? 2 : 1); j++) {
       u32 stk = r->loop[i].stk[j];
-      if (stk) {
+      if (stk && stk != 0xffffffff) {
         rep.AddStack(StackDepotGet(stk), true);
       } else {
         // Sometimes we fail to extract the stack trace (FIXME: investigate),
diff --git a/lib/tsan/rtl/tsan_rtl_ppc64.S b/lib/tsan/rtl/tsan_rtl_ppc64.S
new file mode 100644
index 0000000..8285e21
--- /dev/null
+++ b/lib/tsan/rtl/tsan_rtl_ppc64.S
@@ -0,0 +1,288 @@
+#include "tsan_ppc_regs.h"
+
+        .section .text
+        .hidden __tsan_setjmp
+        .globl _setjmp
+        .type _setjmp, @function
+        .align 4
+#if _CALL_ELF == 2
+_setjmp:
+#else
+	.section ".opd","aw"
+	.align 3
+_setjmp:
+	.quad   .L._setjmp,.TOC.@tocbase,0
+	.previous
+#endif
+.L._setjmp:
+        mflr    r0
+        stdu    r1,-48(r1)
+        std     r2,24(r1)
+        std     r3,32(r1)
+        std     r0,40(r1)
+        // r3 is the original stack pointer.
+        addi    r3,r1,48
+        // r4 is the mangled stack pointer (see glibc)
+        ld      r4,-28696(r13)
+        xor     r4,r3,r4
+        // Materialize a TOC in case we were called from libc.
+        // For big-endian, we load the TOC from the OPD.  For little-
+        // endian, we use the .TOC. symbol to find it.
+        nop
+        bcl     20,31,0f
+0:
+        mflr    r2
+#if _CALL_ELF == 2
+        addis   r2,r2,.TOC.-0b@ha
+        addi    r2,r2,.TOC.-0b@l
+#else
+        addis   r2,r2,_setjmp-0b@ha
+        addi    r2,r2,_setjmp-0b@l
+        ld      r2,8(r2)
+#endif
+        // Call the interceptor.
+        bl      __tsan_setjmp
+        nop
+        // Restore regs needed for setjmp.
+        ld      r3,32(r1)
+        ld      r0,40(r1)
+        // Emulate the real setjmp function.  We do this because we can't
+        // perform a sibcall:  The real setjmp function trashes the TOC
+        // pointer, and with a sibcall we have no way to restore it.
+        // This way we can make sure our caller's stack pointer and
+        // link register are saved correctly in the jmpbuf.
+        ld      r6,-28696(r13)
+        addi    r5,r1,48  // original stack ptr of caller
+        xor     r5,r6,r5
+        std     r5,0(r3)  // mangled stack ptr of caller
+        ld      r5,24(r1)
+        std     r5,8(r3)  // caller's saved TOC pointer
+        xor     r0,r6,r0
+        std     r0,16(r3) // caller's mangled return address
+        mfcr    r0
+        // Nonvolatiles.
+        std     r14,24(r3)
+        stfd    f14,176(r3)
+        stw     r0,172(r3) // CR
+        std     r15,32(r3)
+        stfd    f15,184(r3)
+        std     r16,40(r3)
+        stfd    f16,192(r3)
+        std     r17,48(r3)
+        stfd    f17,200(r3)
+        std     r18,56(r3)
+        stfd    f18,208(r3)
+        std     r19,64(r3)
+        stfd    f19,216(r3)
+        std     r20,72(r3)
+        stfd    f20,224(r3)
+        std     r21,80(r3)
+        stfd    f21,232(r3)
+        std     r22,88(r3)
+        stfd    f22,240(r3)
+        std     r23,96(r3)
+        stfd    f23,248(r3)
+        std     r24,104(r3)
+        stfd    f24,256(r3)
+        std     r25,112(r3)
+        stfd    f25,264(r3)
+        std     r26,120(r3)
+        stfd    f26,272(r3)
+        std     r27,128(r3)
+        stfd    f27,280(r3)
+        std     r28,136(r3)
+        stfd    f28,288(r3)
+        std     r29,144(r3)
+        stfd    f29,296(r3)
+        std     r30,152(r3)
+        stfd    f30,304(r3)
+        std     r31,160(r3)
+        stfd    f31,312(r3)
+        addi    r5,r3,320
+        mfspr   r0,256
+        stw     r0,168(r3)  // VRSAVE
+        addi    r6,r5,16
+        stvx    v20,0,r5
+        addi    r5,r5,32
+        stvx    v21,0,r6
+        addi    r6,r6,32
+        stvx    v22,0,r5
+        addi    r5,r5,32
+        stvx    v23,0,r6
+        addi    r6,r6,32
+        stvx    v24,0,r5
+        addi    r5,r5,32
+        stvx    v25,0,r6
+        addi    r6,r6,32
+        stvx    v26,0,r5
+        addi    r5,r5,32
+        stvx    v27,0,r6
+        addi    r6,r6,32
+        stvx    v28,0,r5
+        addi    r5,r5,32
+        stvx    v29,0,r6
+        addi    r6,r6,32
+        stvx    v30,0,r5
+        stvx    v31,0,r6
+        // Clear the "mask-saved" slot.
+        li      r4,0
+        stw     r4,512(r3)
+        // Restore TOC, LR, and stack and return to caller.
+        ld      r2,24(r1)
+        ld      r0,40(r1)
+        addi    r1,r1,48
+        li      r3,0  // This is the setjmp return path
+        mtlr    r0
+        blr
+        .size _setjmp, .-.L._setjmp
+
+        .globl setjmp
+        .type setjmp, @function
+        .align 4
+setjmp:
+        b       _setjmp
+        .size setjmp, .-setjmp
+
+        // sigsetjmp is like setjmp, except that the mask in r4 needs
+        // to be saved at offset 512 of the jump buffer.
+        .globl __sigsetjmp
+        .type __sigsetjmp, @function
+        .align 4
+#if _CALL_ELF == 2
+__sigsetjmp:
+#else
+	.section ".opd","aw"
+	.align 3
+__sigsetjmp:
+	.quad   .L.__sigsetjmp,.TOC.@tocbase,0
+	.previous
+#endif
+.L.__sigsetjmp:
+        mflr    r0
+        stdu    r1,-64(r1)
+        std     r2,24(r1)
+        std     r3,32(r1)
+        std     r4,40(r1)
+        std     r0,48(r1)
+        // r3 is the original stack pointer.
+        addi    r3,r1,64
+        // r4 is the mangled stack pointer (see glibc)
+        ld      r4,-28696(r13)
+        xor     r4,r3,r4
+        // Materialize a TOC in case we were called from libc.
+        // For big-endian, we load the TOC from the OPD.  For little-
+        // endian, we use the .TOC. symbol to find it.
+        nop
+        bcl     20,31,1f
+1:
+        mflr    r2
+#if _CALL_ELF == 2
+        addis   r2,r2,.TOC.-1b@ha
+        addi    r2,r2,.TOC.-1b@l
+#else
+        addis   r2,r2,_setjmp-1b@ha
+        addi    r2,r2,_setjmp-1b@l
+        ld      r2,8(r2)
+#endif
+        // Call the interceptor.
+        bl      __tsan_setjmp
+        nop
+        // Restore regs needed for __sigsetjmp.
+        ld      r3,32(r1)
+        ld      r4,40(r1)
+        ld      r0,48(r1)
+        // Emulate the real sigsetjmp function.  We do this because we can't
+        // perform a sibcall:  The real sigsetjmp function trashes the TOC
+        // pointer, and with a sibcall we have no way to restore it.
+        // This way we can make sure our caller's stack pointer and
+        // link register are saved correctly in the jmpbuf.
+        ld      r6,-28696(r13)
+        addi    r5,r1,64  // original stack ptr of caller
+        xor     r5,r6,r5
+        std     r5,0(r3)  // mangled stack ptr of caller
+        ld      r5,24(r1)
+        std     r5,8(r3)  // caller's saved TOC pointer
+        xor     r0,r6,r0
+        std     r0,16(r3) // caller's mangled return address
+        mfcr    r0
+        // Nonvolatiles.
+        std     r14,24(r3)
+        stfd    f14,176(r3)
+        stw     r0,172(r3) // CR
+        std     r15,32(r3)
+        stfd    f15,184(r3)
+        std     r16,40(r3)
+        stfd    f16,192(r3)
+        std     r17,48(r3)
+        stfd    f17,200(r3)
+        std     r18,56(r3)
+        stfd    f18,208(r3)
+        std     r19,64(r3)
+        stfd    f19,216(r3)
+        std     r20,72(r3)
+        stfd    f20,224(r3)
+        std     r21,80(r3)
+        stfd    f21,232(r3)
+        std     r22,88(r3)
+        stfd    f22,240(r3)
+        std     r23,96(r3)
+        stfd    f23,248(r3)
+        std     r24,104(r3)
+        stfd    f24,256(r3)
+        std     r25,112(r3)
+        stfd    f25,264(r3)
+        std     r26,120(r3)
+        stfd    f26,272(r3)
+        std     r27,128(r3)
+        stfd    f27,280(r3)
+        std     r28,136(r3)
+        stfd    f28,288(r3)
+        std     r29,144(r3)
+        stfd    f29,296(r3)
+        std     r30,152(r3)
+        stfd    f30,304(r3)
+        std     r31,160(r3)
+        stfd    f31,312(r3)
+        addi    r5,r3,320
+        mfspr   r0,256
+        stw     r0,168(r3) // VRSAVE
+        addi    r6,r5,16
+        stvx    v20,0,r5
+        addi    r5,r5,32
+        stvx    v21,0,r6
+        addi    r6,r6,32
+        stvx    v22,0,r5
+        addi    r5,r5,32
+        stvx    v23,0,r6
+        addi    r6,r6,32
+        stvx    v24,0,r5
+        addi    r5,r5,32
+        stvx    v25,0,r6
+        addi    r6,r6,32
+        stvx    v26,0,r5
+        addi    r5,r5,32
+        stvx    v27,0,r6
+        addi    r6,r6,32
+        stvx    v28,0,r5
+        addi    r5,r5,32
+        stvx    v29,0,r6
+        addi    r6,r6,32
+        stvx    v30,0,r5
+        stvx    v31,0,r6
+        // Save into the "mask-saved" slot.
+        stw     r4,512(r3)
+        // Restore TOC, LR, and stack and return to caller.
+        ld      r2,24(r1)
+        ld      r0,48(r1)
+        addi    r1,r1,64
+        li      r3,0  // This is the sigsetjmp return path
+        mtlr    r0
+        blr
+        .size __sigsetjmp, .-.L.__sigsetjmp
+
+        .globl sigsetjmp
+        .type sigsetjmp, @function
+        .align 4
+sigsetjmp:
+        b       __sigsetjmp
+        .size sigsetjmp, .-sigsetjmp
diff --git a/lib/tsan/rtl/tsan_rtl_report.cc b/lib/tsan/rtl/tsan_rtl_report.cc
index dc9438e..5aff6ca 100644
--- a/lib/tsan/rtl/tsan_rtl_report.cc
+++ b/lib/tsan/rtl/tsan_rtl_report.cc
@@ -49,8 +49,8 @@
 #ifdef TSAN_EXTERNAL_HOOKS
 bool OnReport(const ReportDesc *rep, bool suppressed);
 #else
-SANITIZER_INTERFACE_ATTRIBUTE
-bool WEAK OnReport(const ReportDesc *rep, bool suppressed) {
+SANITIZER_WEAK_CXX_DEFAULT_IMPL
+bool OnReport(const ReportDesc *rep, bool suppressed) {
   (void)rep;
   return suppressed;
 }
@@ -186,7 +186,7 @@
       return;
   }
   void *mem = internal_alloc(MBlockReportThread, sizeof(ReportThread));
-  ReportThread *rt = new(mem) ReportThread();
+  ReportThread *rt = new(mem) ReportThread;
   rep_->threads.PushBack(rt);
   rt->id = tctx->tid;
   rt->pid = tctx->os_id;
@@ -200,16 +200,16 @@
 }
 
 #ifndef SANITIZER_GO
+static bool FindThreadByUidLockedCallback(ThreadContextBase *tctx, void *arg) {
+  int unique_id = *(int *)arg;
+  return tctx->unique_id == (u32)unique_id;
+}
+
 static ThreadContext *FindThreadByUidLocked(int unique_id) {
   ctx->thread_registry->CheckLocked();
-  for (unsigned i = 0; i < kMaxTid; i++) {
-    ThreadContext *tctx = static_cast<ThreadContext*>(
-        ctx->thread_registry->GetThreadLocked(i));
-    if (tctx && tctx->unique_id == (u32)unique_id) {
-      return tctx;
-    }
-  }
-  return 0;
+  return static_cast<ThreadContext *>(
+      ctx->thread_registry->FindThreadContextLocked(
+          FindThreadByUidLockedCallback, &unique_id));
 }
 
 static ThreadContext *FindThreadByTidLocked(int tid) {
@@ -256,7 +256,7 @@
       return;
   }
   void *mem = internal_alloc(MBlockReportMutex, sizeof(ReportMutex));
-  ReportMutex *rm = new(mem) ReportMutex();
+  ReportMutex *rm = new(mem) ReportMutex;
   rep_->mutexes.PushBack(rm);
   rm->id = s->uid;
   rm->addr = s->addr;
@@ -289,7 +289,7 @@
       return;
   }
   void *mem = internal_alloc(MBlockReportMutex, sizeof(ReportMutex));
-  ReportMutex *rm = new(mem) ReportMutex();
+  ReportMutex *rm = new(mem) ReportMutex;
   rep_->mutexes.PushBack(rm);
   rm->id = id;
   rm->addr = 0;
@@ -369,27 +369,20 @@
   // This function restores stack trace and mutex set for the thread/epoch.
   // It does so by getting stack trace and mutex set at the beginning of
   // trace part, and then replaying the trace till the given epoch.
-  ctx->thread_registry->CheckLocked();
-  ThreadContext *tctx = static_cast<ThreadContext*>(
-      ctx->thread_registry->GetThreadLocked(tid));
-  if (tctx == 0)
-    return;
-  if (tctx->status != ThreadStatusRunning
-      && tctx->status != ThreadStatusFinished
-      && tctx->status != ThreadStatusDead)
-    return;
-  Trace* trace = ThreadTrace(tctx->tid);
-  Lock l(&trace->mtx);
+  Trace* trace = ThreadTrace(tid);
+  ReadLock l(&trace->mtx);
   const int partidx = (epoch / kTracePartSize) % TraceParts();
   TraceHeader* hdr = &trace->headers[partidx];
-  if (epoch < hdr->epoch0)
+  if (epoch < hdr->epoch0 || epoch >= hdr->epoch0 + kTracePartSize)
     return;
+  CHECK_EQ(RoundDown(epoch, kTracePartSize), hdr->epoch0);
   const u64 epoch0 = RoundDown(epoch, TraceSize());
   const u64 eend = epoch % TraceSize();
   const u64 ebegin = RoundDown(eend, kTracePartSize);
   DPrintf("#%d: RestoreStack epoch=%zu ebegin=%zu eend=%zu partidx=%d\n",
           tid, (uptr)epoch, (uptr)ebegin, (uptr)eend, partidx);
-  InternalScopedBuffer<uptr> stack(kShadowStackSize);
+  Vector<uptr> stack(MBlockReportStack);
+  stack.Resize(hdr->stack0.size + 64);
   for (uptr i = 0; i < hdr->stack0.size; i++) {
     stack[i] = hdr->stack0.trace[i];
     DPrintf2("  #%02zu: pc=%zx\n", i, stack[i]);
@@ -406,6 +399,8 @@
     if (typ == EventTypeMop) {
       stack[pos] = pc;
     } else if (typ == EventTypeFuncEnter) {
+      if (stack.Size() < pos + 2)
+        stack.Resize(pos + 2);
       stack[pos++] = pc;
     } else if (typ == EventTypeFuncExit) {
       if (pos > 0)
@@ -428,50 +423,58 @@
   if (pos == 0 && stack[0] == 0)
     return;
   pos++;
-  stk->Init(stack.data(), pos);
+  stk->Init(&stack[0], pos);
 }
 
 static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
                              uptr addr_min, uptr addr_max) {
   bool equal_stack = false;
   RacyStacks hash;
-  if (flags()->suppress_equal_stacks) {
-    hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-    hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
-    for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
-      if (hash == ctx->racy_stacks[i]) {
-        DPrintf("ThreadSanitizer: suppressing report as doubled (stack)\n");
-        equal_stack = true;
-        break;
-      }
-    }
-  }
   bool equal_address = false;
   RacyAddress ra0 = {addr_min, addr_max};
-  if (flags()->suppress_equal_addresses) {
-    for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
-      RacyAddress ra2 = ctx->racy_addresses[i];
-      uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
-      uptr minend = min(ra0.addr_max, ra2.addr_max);
-      if (maxbeg < minend) {
-        DPrintf("ThreadSanitizer: suppressing report as doubled (addr)\n");
-        equal_address = true;
-        break;
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (flags()->suppress_equal_stacks) {
+      hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
+      hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+      for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
+        if (hash == ctx->racy_stacks[i]) {
+          VPrintf(2,
+              "ThreadSanitizer: suppressing report as doubled (stack)\n");
+          equal_stack = true;
+          break;
+        }
+      }
+    }
+    if (flags()->suppress_equal_addresses) {
+      for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
+        RacyAddress ra2 = ctx->racy_addresses[i];
+        uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
+        uptr minend = min(ra0.addr_max, ra2.addr_max);
+        if (maxbeg < minend) {
+          VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
+          equal_address = true;
+          break;
+        }
       }
     }
   }
-  if (equal_stack || equal_address) {
-    if (!equal_stack)
-      ctx->racy_stacks.PushBack(hash);
-    if (!equal_address)
-      ctx->racy_addresses.PushBack(ra0);
-    return true;
+  if (!equal_stack && !equal_address)
+    return false;
+  if (!equal_stack) {
+    Lock lock(&ctx->racy_mtx);
+    ctx->racy_stacks.PushBack(hash);
   }
-  return false;
+  if (!equal_address) {
+    Lock lock(&ctx->racy_mtx);
+    ctx->racy_addresses.PushBack(ra0);
+  }
+  return true;
 }
 
 static void AddRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
                           uptr addr_min, uptr addr_max) {
+  Lock lock(&ctx->racy_mtx);
   if (flags()->suppress_equal_stacks) {
     RacyStacks hash;
     hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
@@ -485,26 +488,29 @@
 }
 
 bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
-  atomic_store(&ctx->last_symbolize_time_ns, NanoTime(), memory_order_relaxed);
+  if (!flags()->report_bugs)
+    return false;
+  atomic_store_relaxed(&ctx->last_symbolize_time_ns, NanoTime());
   const ReportDesc *rep = srep.GetReport();
   Suppression *supp = 0;
-  uptr suppress_pc = 0;
-  for (uptr i = 0; suppress_pc == 0 && i < rep->mops.Size(); i++)
-    suppress_pc = IsSuppressed(rep->typ, rep->mops[i]->stack, &supp);
-  for (uptr i = 0; suppress_pc == 0 && i < rep->stacks.Size(); i++)
-    suppress_pc = IsSuppressed(rep->typ, rep->stacks[i], &supp);
-  for (uptr i = 0; suppress_pc == 0 && i < rep->threads.Size(); i++)
-    suppress_pc = IsSuppressed(rep->typ, rep->threads[i]->stack, &supp);
-  for (uptr i = 0; suppress_pc == 0 && i < rep->locs.Size(); i++)
-    suppress_pc = IsSuppressed(rep->typ, rep->locs[i], &supp);
-  if (suppress_pc != 0) {
-    FiredSuppression s = {srep.GetReport()->typ, suppress_pc, supp};
+  uptr pc_or_addr = 0;
+  for (uptr i = 0; pc_or_addr == 0 && i < rep->mops.Size(); i++)
+    pc_or_addr = IsSuppressed(rep->typ, rep->mops[i]->stack, &supp);
+  for (uptr i = 0; pc_or_addr == 0 && i < rep->stacks.Size(); i++)
+    pc_or_addr = IsSuppressed(rep->typ, rep->stacks[i], &supp);
+  for (uptr i = 0; pc_or_addr == 0 && i < rep->threads.Size(); i++)
+    pc_or_addr = IsSuppressed(rep->typ, rep->threads[i]->stack, &supp);
+  for (uptr i = 0; pc_or_addr == 0 && i < rep->locs.Size(); i++)
+    pc_or_addr = IsSuppressed(rep->typ, rep->locs[i], &supp);
+  if (pc_or_addr != 0) {
+    Lock lock(&ctx->fired_suppressions_mtx);
+    FiredSuppression s = {srep.GetReport()->typ, pc_or_addr, supp};
     ctx->fired_suppressions.push_back(s);
   }
   {
     bool old_is_freeing = thr->is_freeing;
     thr->is_freeing = false;
-    bool suppressed = OnReport(rep, suppress_pc != 0);
+    bool suppressed = OnReport(rep, pc_or_addr != 0);
     thr->is_freeing = old_is_freeing;
     if (suppressed)
       return false;
@@ -512,20 +518,20 @@
   PrintReport(rep);
   ctx->nreported++;
   if (flags()->halt_on_error)
-    internal__exit(flags()->exitcode);
+    Die();
   return true;
 }
 
-bool IsFiredSuppression(Context *ctx, const ScopedReport &srep,
-                        StackTrace trace) {
+bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace) {
+  ReadLock lock(&ctx->fired_suppressions_mtx);
   for (uptr k = 0; k < ctx->fired_suppressions.size(); k++) {
-    if (ctx->fired_suppressions[k].type != srep.GetReport()->typ)
+    if (ctx->fired_suppressions[k].type != type)
       continue;
     for (uptr j = 0; j < trace.size; j++) {
       FiredSuppression *s = &ctx->fired_suppressions[k];
-      if (trace.trace[j] == s->pc) {
+      if (trace.trace[j] == s->pc_or_addr) {
         if (s->supp)
-          s->supp->hit_count++;
+          atomic_fetch_add(&s->supp->hit_count, 1, memory_order_relaxed);
         return true;
       }
     }
@@ -533,16 +539,15 @@
   return false;
 }
 
-static bool IsFiredSuppression(Context *ctx,
-                               const ScopedReport &srep,
-                               uptr addr) {
+static bool IsFiredSuppression(Context *ctx, ReportType type, uptr addr) {
+  ReadLock lock(&ctx->fired_suppressions_mtx);
   for (uptr k = 0; k < ctx->fired_suppressions.size(); k++) {
-    if (ctx->fired_suppressions[k].type != srep.GetReport()->typ)
+    if (ctx->fired_suppressions[k].type != type)
       continue;
     FiredSuppression *s = &ctx->fired_suppressions[k];
-    if (addr == s->pc) {
+    if (addr == s->pc_or_addr) {
       if (s->supp)
-        s->supp->hit_count++;
+        atomic_fetch_add(&s->supp->hit_count, 1, memory_order_relaxed);
       return true;
     }
   }
@@ -595,8 +600,6 @@
       return;
   }
 
-  ThreadRegistryLock l0(ctx->thread_registry);
-
   ReportType typ = ReportTypeRace;
   if (thr->is_vptr_access && freed)
     typ = ReportTypeVptrUseAfterFree;
@@ -604,29 +607,35 @@
     typ = ReportTypeVptrRace;
   else if (freed)
     typ = ReportTypeUseAfterFree;
-  ScopedReport rep(typ);
-  if (IsFiredSuppression(ctx, rep, addr))
+
+  if (IsFiredSuppression(ctx, typ, addr))
     return;
+
   const uptr kMop = 2;
   VarSizeStackTrace traces[kMop];
   const uptr toppc = TraceTopPC(thr);
   ObtainCurrentStack(thr, toppc, &traces[0]);
-  if (IsFiredSuppression(ctx, rep, traces[0]))
+  if (IsFiredSuppression(ctx, typ, traces[0]))
     return;
-  InternalScopedBuffer<MutexSet> mset2(1);
-  new(mset2.data()) MutexSet();
+
+  // MutexSet is too large to live on stack.
+  Vector<u64> mset_buffer(MBlockScopedBuf);
+  mset_buffer.Resize(sizeof(MutexSet) / sizeof(u64) + 1);
+  MutexSet *mset2 = new(&mset_buffer[0]) MutexSet();
+
   Shadow s2(thr->racy_state[1]);
-  RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2.data());
-  if (IsFiredSuppression(ctx, rep, traces[1]))
+  RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2);
+  if (IsFiredSuppression(ctx, typ, traces[1]))
     return;
 
   if (HandleRacyStacks(thr, traces, addr_min, addr_max))
     return;
 
+  ThreadRegistryLock l0(ctx->thread_registry);
+  ScopedReport rep(typ);
   for (uptr i = 0; i < kMop; i++) {
     Shadow s(thr->racy_state[i]);
-    rep.AddMemoryAccess(addr, s, traces[i],
-                        i == 0 ? &thr->mset : mset2.data());
+    rep.AddMemoryAccess(addr, s, traces[i], i == 0 ? &thr->mset : mset2);
   }
 
   for (uptr i = 0; i < kMop; i++) {
diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc
index 66c78cf..dcae255 100644
--- a/lib/tsan/rtl/tsan_rtl_thread.cc
+++ b/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -55,6 +55,8 @@
   if (tid == 0)
     return;
   OnCreatedArgs *args = static_cast<OnCreatedArgs *>(arg);
+  if (!args->thr)  // GCD workers don't have a parent thread.
+    return;
   args->thr->fast_state.IncrementEpoch();
   // Can't increment epoch w/o writing to the trace as well.
   TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0);
@@ -231,8 +233,10 @@
 int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) {
   StatInc(thr, StatThreadCreate);
   OnCreatedArgs args = { thr, pc };
-  int tid = ctx->thread_registry->CreateThread(uid, detached, thr->tid, &args);
-  DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", thr->tid, tid, uid);
+  u32 parent_tid = thr ? thr->tid : kInvalidTid;  // No parent for GCD workers.
+  int tid =
+      ctx->thread_registry->CreateThread(uid, detached, parent_tid, &args);
+  DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent_tid, tid, uid);
   StatSet(thr, StatThreadMaxAlive, ctx->thread_registry->GetMaxAliveThreads());
   return tid;
 }
diff --git a/lib/tsan/rtl/tsan_stat.cc b/lib/tsan/rtl/tsan_stat.cc
index 15fa43d..a5cca96 100644
--- a/lib/tsan/rtl/tsan_stat.cc
+++ b/lib/tsan/rtl/tsan_stat.cc
@@ -164,8 +164,9 @@
   name[StatMtxAtExit]                    = "  Atexit                          ";
   name[StatMtxAnnotations]               = "  Annotations                     ";
   name[StatMtxMBlock]                    = "  MBlock                          ";
-  name[StatMtxJavaMBlock]                = "  JavaMBlock                      ";
   name[StatMtxDeadlockDetector]          = "  DeadlockDetector                ";
+  name[StatMtxFired]                     = "  FiredSuppressions               ";
+  name[StatMtxRacy]                      = "  RacyStacks                      ";
   name[StatMtxFD]                        = "  FD                              ";
 
   Printf("Statistics:\n");
diff --git a/lib/tsan/rtl/tsan_stat.h b/lib/tsan/rtl/tsan_stat.h
index 0bd949e..8ea3204 100644
--- a/lib/tsan/rtl/tsan_stat.h
+++ b/lib/tsan/rtl/tsan_stat.h
@@ -169,8 +169,9 @@
   StatMtxAnnotations,
   StatMtxAtExit,
   StatMtxMBlock,
-  StatMtxJavaMBlock,
   StatMtxDeadlockDetector,
+  StatMtxFired,
+  StatMtxRacy,
   StatMtxFD,
 
   // This must be the last.
diff --git a/lib/tsan/rtl/tsan_suppressions.cc b/lib/tsan/rtl/tsan_suppressions.cc
index 7315729..8754b61 100644
--- a/lib/tsan/rtl/tsan_suppressions.cc
+++ b/lib/tsan/rtl/tsan_suppressions.cc
@@ -34,7 +34,8 @@
 "race:std::_Sp_counted_ptr_inplace<std::thread::_Impl\n";
 
 // Can be overriden in frontend.
-extern "C" const char *WEAK __tsan_default_suppressions() {
+SANITIZER_WEAK_DEFAULT_IMPL
+const char *__tsan_default_suppressions() {
   return 0;
 }
 #endif
@@ -44,8 +45,9 @@
 ALIGNED(64) static char suppression_placeholder[sizeof(SuppressionContext)];
 static SuppressionContext *suppression_ctx = nullptr;
 static const char *kSuppressionTypes[] = {
-    kSuppressionRace,   kSuppressionMutex, kSuppressionThread,
-    kSuppressionSignal, kSuppressionLib,   kSuppressionDeadlock};
+    kSuppressionRace,   kSuppressionRaceTop, kSuppressionMutex,
+    kSuppressionThread, kSuppressionSignal, kSuppressionLib,
+    kSuppressionDeadlock};
 
 void InitializeSuppressions() {
   CHECK_EQ(nullptr, suppression_ctx);
@@ -94,6 +96,18 @@
   Die();
 }
 
+static uptr IsSuppressed(const char *stype, const AddressInfo &info,
+    Suppression **sp) {
+  if (suppression_ctx->Match(info.function, stype, sp) ||
+      suppression_ctx->Match(info.file, stype, sp) ||
+      suppression_ctx->Match(info.module, stype, sp)) {
+    VPrintf(2, "ThreadSanitizer: matched suppression '%s'\n", (*sp)->templ);
+    atomic_fetch_add(&(*sp)->hit_count, 1, memory_order_relaxed);
+    return info.address;
+  }
+  return 0;
+}
+
 uptr IsSuppressed(ReportType typ, const ReportStack *stack, Suppression **sp) {
   CHECK(suppression_ctx);
   if (!suppression_ctx->SuppressionCount() || stack == 0 ||
@@ -102,19 +116,14 @@
   const char *stype = conv(typ);
   if (0 == internal_strcmp(stype, kSuppressionNone))
     return 0;
-  Suppression *s;
   for (const SymbolizedStack *frame = stack->frames; frame;
-       frame = frame->next) {
-    const AddressInfo &info = frame->info;
-    if (suppression_ctx->Match(info.function, stype, &s) ||
-        suppression_ctx->Match(info.file, stype, &s) ||
-        suppression_ctx->Match(info.module, stype, &s)) {
-      DPrintf("ThreadSanitizer: matched suppression '%s'\n", s->templ);
-      s->hit_count++;
-      *sp = s;
-      return info.address;
-    }
+      frame = frame->next) {
+    uptr pc = IsSuppressed(stype, frame->info, sp);
+    if (pc != 0)
+      return pc;
   }
+  if (0 == internal_strcmp(stype, kSuppressionRace) && stack->frames != nullptr)
+    return IsSuppressed(kSuppressionRaceTop, stack->frames->info, sp);
   return 0;
 }
 
@@ -130,8 +139,8 @@
   const DataInfo &global = loc->global;
   if (suppression_ctx->Match(global.name, stype, &s) ||
       suppression_ctx->Match(global.module, stype, &s)) {
-      DPrintf("ThreadSanitizer: matched suppression '%s'\n", s->templ);
-      s->hit_count++;
+      VPrintf(2, "ThreadSanitizer: matched suppression '%s'\n", s->templ);
+      atomic_fetch_add(&s->hit_count, 1, memory_order_relaxed);
       *sp = s;
       return global.start;
   }
@@ -146,7 +155,7 @@
     return;
   int hit_count = 0;
   for (uptr i = 0; i < matched.size(); i++)
-    hit_count += matched[i]->hit_count;
+    hit_count += atomic_load_relaxed(&matched[i]->hit_count);
   Printf("ThreadSanitizer: Matched %d suppressions (pid=%d):\n", hit_count,
          (int)internal_getpid());
   for (uptr i = 0; i < matched.size(); i++) {
diff --git a/lib/tsan/rtl/tsan_suppressions.h b/lib/tsan/rtl/tsan_suppressions.h
index e6d279c..526952d 100644
--- a/lib/tsan/rtl/tsan_suppressions.h
+++ b/lib/tsan/rtl/tsan_suppressions.h
@@ -20,6 +20,7 @@
 
 const char kSuppressionNone[] = "none";
 const char kSuppressionRace[] = "race";
+const char kSuppressionRaceTop[] = "race_top";
 const char kSuppressionMutex[] = "mutex";
 const char kSuppressionThread[] = "thread";
 const char kSuppressionSignal[] = "signal";
diff --git a/lib/tsan/rtl/tsan_symbolize.cc b/lib/tsan/rtl/tsan_symbolize.cc
index 3beb44f..b242395 100644
--- a/lib/tsan/rtl/tsan_symbolize.cc
+++ b/lib/tsan/rtl/tsan_symbolize.cc
@@ -38,16 +38,10 @@
 
 // May be overriden by JIT/JAVA/etc,
 // whatever produces PCs marked with kExternalPCBit.
-extern "C" bool __tsan_symbolize_external(uptr pc,
-                               char *func_buf, uptr func_siz,
-                               char *file_buf, uptr file_siz,
-                               int *line, int *col)
-                               SANITIZER_WEAK_ATTRIBUTE;
-
-bool __tsan_symbolize_external(uptr pc,
-                               char *func_buf, uptr func_siz,
-                               char *file_buf, uptr file_siz,
-                               int *line, int *col) {
+SANITIZER_WEAK_DEFAULT_IMPL
+bool __tsan_symbolize_external(uptr pc, char *func_buf, uptr func_siz,
+                               char *file_buf, uptr file_siz, int *line,
+                               int *col) {
   return false;
 }
 
@@ -77,7 +71,7 @@
   if (!Symbolizer::GetOrInit()->SymbolizeData(addr, &info))
     return 0;
   ReportLocation *ent = ReportLocation::New(ReportLocationGlobal);
-  ent->global = info;
+  internal_memcpy(&ent->global, &info, sizeof(info));
   return ent;
 }
 
diff --git a/lib/tsan/rtl/tsan_symbolize.h b/lib/tsan/rtl/tsan_symbolize.h
index b59b6cf..5a9710a 100644
--- a/lib/tsan/rtl/tsan_symbolize.h
+++ b/lib/tsan/rtl/tsan_symbolize.h
@@ -18,10 +18,6 @@
 
 namespace __tsan {
 
-// Denotes fake PC values that come from JIT/JAVA/etc.
-// For such PC values __tsan_symbolize_external() will be called.
-const uptr kExternalPCBit = 1ULL << 60;
-
 void EnterSymbolizer();
 void ExitSymbolizer();
 SymbolizedStack *SymbolizeCode(uptr addr);
diff --git a/lib/tsan/rtl/tsan_sync.h b/lib/tsan/rtl/tsan_sync.h
index 2d12cdf..f07ea3b 100644
--- a/lib/tsan/rtl/tsan_sync.h
+++ b/lib/tsan/rtl/tsan_sync.h
@@ -86,9 +86,9 @@
   void OnThreadIdle(ThreadState *thr);
 
  private:
-  static const u32 kFlagMask  = 3 << 30;
-  static const u32 kFlagBlock = 1 << 30;
-  static const u32 kFlagSync  = 2 << 30;
+  static const u32 kFlagMask  = 3u << 30;
+  static const u32 kFlagBlock = 1u << 30;
+  static const u32 kFlagSync  = 2u << 30;
   typedef DenseSlabAlloc<MBlock, 1<<16, 1<<12> BlockAlloc;
   typedef DenseSlabAlloc<SyncVar, 1<<16, 1<<10> SyncAlloc;
   BlockAlloc block_alloc_;