Update compiler-rt for rebase to r212749.

Includes a cherry-pick of:
r213309 - fixes umodsi3

Change-Id: Ic7367e3586b6af7ef74bee6a8cf437d5f28d975a
diff --git a/CMakeLists.txt b/CMakeLists.txt
index da49e5e..f35a096 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,12 +61,8 @@
   # Windows where we need to use clang-cl instead.
   if(NOT MSVC)
     set(COMPILER_RT_TEST_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
-    set(COMPILER_RT_TEST_COMPILER_EXE "-o")
-    set(COMPILER_RT_TEST_COMPILER_OBJ "-o")
   else()
-    set(COMPILER_RT_TEST_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-cl.exe)
-    set(COMPILER_RT_TEST_COMPILER_EXE "-Fe")
-    set(COMPILER_RT_TEST_COMPILER_OBJ "-Fo")
+    set(COMPILER_RT_TEST_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang.exe)
   endif()
 else()
   # Take output dir and install path from the user.
@@ -80,8 +76,6 @@
   option(COMPILER_RT_ENABLE_WERROR "Fail and stop if warning is triggered" OFF)
   # Use a host compiler to compile/link tests.
   set(COMPILER_RT_TEST_COMPILER ${CMAKE_C_COMPILER} CACHE PATH "Compiler to use for testing")
-  set(COMPILER_RT_TEST_COMPILER_EXE "-o")
-  set(COMPILER_RT_TEST_COMPILER_OBJ "-o")
 
   if (NOT LLVM_CONFIG_PATH)
     find_program(LLVM_CONFIG_PATH "llvm-config"
@@ -131,7 +125,7 @@
 
 if("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang[+]*$")
   set(COMPILER_RT_TEST_COMPILER_ID Clang)
-elseif("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang-cl.exe$")
+elseif("${COMPILER_RT_TEST_COMPILER}" MATCHES "clang.*.exe$")
   set(COMPILER_RT_TEST_COMPILER_ID Clang)
 else()
   set(COMPILER_RT_TEST_COMPILER_ID GNU)
@@ -142,6 +136,12 @@
     "Default triple for cross-compiled executables")
 string(REPLACE "-" ";" TARGET_TRIPLE_LIST ${COMPILER_RT_TEST_TARGET_TRIPLE})
 list(GET TARGET_TRIPLE_LIST 0 COMPILER_RT_TEST_TARGET_ARCH)
+list(GET TARGET_TRIPLE_LIST 1 COMPILER_RT_TEST_TARGET_OS)
+list(GET TARGET_TRIPLE_LIST 2 COMPILER_RT_TEST_TARGET_ABI)
+
+if ("${COMPILER_RT_TEST_TARGET_ABI}" STREQUAL "androideabi")
+  set(ANDROID 1)
+endif()
 
 string(TOLOWER ${CMAKE_SYSTEM_NAME} COMPILER_RT_OS_DIR)
 set(COMPILER_RT_LIBRARY_OUTPUT_DIR
@@ -205,14 +205,19 @@
   set(TARGET_${arch}_CFLAGS ${ARGN})
   try_compile(CAN_TARGET_${arch} ${CMAKE_BINARY_DIR} ${SIMPLE_SOURCE}
               COMPILE_DEFINITIONS "${TARGET_${arch}_CFLAGS}"
+              OUTPUT_VARIABLE TARGET_${arch}_OUTPUT
               CMAKE_FLAGS "-DCMAKE_EXE_LINKER_FLAGS:STRING=${TARGET_${arch}_CFLAGS}")
   if(${CAN_TARGET_${arch}})
     list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
+  elseif("${COMPILER_RT_TEST_TARGET_ARCH}" MATCHES "${arch}" OR
+         "${arch}" STREQUAL "arm_android")
+    # Bail out if we cannot target the architecture we plan to test.
+    message(FATAL_ERROR "Cannot compile for ${arch}:\n${TARGET_${arch}_OUTPUT}")
   endif()
 endmacro()
 
-if(ANDROID_COMMON_FLAGS)
-  test_target_arch(arm_android "${ANDROID_COMMON_FLAGS}")
+if(ANDROID)
+  test_target_arch(arm_android "")
 else()
   if("${LLVM_NATIVE_ARCH}" STREQUAL "X86")
     if (NOT MSVC)
@@ -221,8 +226,14 @@
     test_target_arch(i386 ${TARGET_32_BIT_CFLAGS})
   elseif("${LLVM_NATIVE_ARCH}" STREQUAL "PowerPC")
     test_target_arch(powerpc64 ${TARGET_64_BIT_CFLAGS})
-  elseif("${LLVM_NATIVE_ARCH}" STREQUAL "ARM")
-    test_target_arch(arm "")
+  elseif("${LLVM_NATIVE_ARCH}" STREQUAL "Mips")
+    test_target_arch(mips "")
+  endif()
+
+  # Build ARM libraries if we are configured to test on ARM
+  if("${COMPILER_RT_TEST_TARGET_ARCH}" MATCHES "arm|aarch64")
+    test_target_arch(arm "-march=armv7-a")
+    test_target_arch(aarch64 "-march=armv8-a")
   endif()
 endif()
 
@@ -353,14 +364,14 @@
 # Architectures supported by Sanitizer runtimes. Specific sanitizers may
 # support only subset of these (e.g. TSan works on x86_64 only).
 filter_available_targets(SANITIZER_COMMON_SUPPORTED_ARCH
-  x86_64 i386 powerpc64 arm)
-filter_available_targets(ASAN_SUPPORTED_ARCH x86_64 i386 powerpc64)
+  x86_64 i386 powerpc64 arm aarch64 mips)
+filter_available_targets(ASAN_SUPPORTED_ARCH x86_64 i386 powerpc64 arm mips)
 filter_available_targets(DFSAN_SUPPORTED_ARCH x86_64)
 filter_available_targets(LSAN_SUPPORTED_ARCH x86_64)
 filter_available_targets(MSAN_SUPPORTED_ARCH x86_64)
-filter_available_targets(PROFILE_SUPPORTED_ARCH x86_64 i386 arm)
+filter_available_targets(PROFILE_SUPPORTED_ARCH x86_64 i386 arm aarch64)
 filter_available_targets(TSAN_SUPPORTED_ARCH x86_64)
-filter_available_targets(UBSAN_SUPPORTED_ARCH x86_64 i386)
+filter_available_targets(UBSAN_SUPPORTED_ARCH x86_64 i386 arm aarch64)
 
 add_subdirectory(include)
 
diff --git a/CREDITS.TXT b/CREDITS.TXT
index 4b7ef51..6964eba 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -23,6 +23,10 @@
 E: hhinnant@apple.com
 D: Architect and primary author of compiler-rt
 
+N: Guan-Hong Liu
+E: koviankevin@hotmail.com
+D: IEEE Quad-precision functions
+
 N: Joerg Sonnenberger
 E: joerg@NetBSD.org
 D: Maintains NetBSD port.
diff --git a/README.txt b/README.txt
index 1c08e74..fc88432 100644
--- a/README.txt
+++ b/README.txt
@@ -9,335 +9,3 @@
 
 ================================
 
-This is a replacement library for libgcc.  Each function is contained
-in its own file.  Each function has a corresponding unit test under
-test/Unit.
-
-A rudimentary script to test each file is in the file called
-test/Unit/test.
-
-Here is the specification for this library:
-
-http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc
-
-Here is a synopsis of the contents of this library:
-
-typedef      int si_int;
-typedef unsigned su_int;
-
-typedef          long long di_int;
-typedef unsigned long long du_int;
-
-// Integral bit manipulation
-
-di_int __ashldi3(di_int a, si_int b);      // a << b
-ti_int __ashlti3(ti_int a, si_int b);      // a << b
-
-di_int __ashrdi3(di_int a, si_int b);      // a >> b  arithmetic (sign fill)
-ti_int __ashrti3(ti_int a, si_int b);      // a >> b  arithmetic (sign fill)
-di_int __lshrdi3(di_int a, si_int b);      // a >> b  logical    (zero fill)
-ti_int __lshrti3(ti_int a, si_int b);      // a >> b  logical    (zero fill)
-
-si_int __clzsi2(si_int a);  // count leading zeros
-si_int __clzdi2(di_int a);  // count leading zeros
-si_int __clzti2(ti_int a);  // count leading zeros
-si_int __ctzsi2(si_int a);  // count trailing zeros
-si_int __ctzdi2(di_int a);  // count trailing zeros
-si_int __ctzti2(ti_int a);  // count trailing zeros
-
-si_int __ffsdi2(di_int a);  // find least significant 1 bit
-si_int __ffsti2(ti_int a);  // find least significant 1 bit
-
-si_int __paritysi2(si_int a);  // bit parity
-si_int __paritydi2(di_int a);  // bit parity
-si_int __parityti2(ti_int a);  // bit parity
-
-si_int __popcountsi2(si_int a);  // bit population
-si_int __popcountdi2(di_int a);  // bit population
-si_int __popcountti2(ti_int a);  // bit population
-
-uint32_t __bswapsi2(uint32_t a);   // a byteswapped, arm only
-uint64_t __bswapdi2(uint64_t a);   // a byteswapped, arm only
-
-// Integral arithmetic
-
-di_int __negdi2    (di_int a);                         // -a
-ti_int __negti2    (ti_int a);                         // -a
-di_int __muldi3    (di_int a, di_int b);               // a * b
-ti_int __multi3    (ti_int a, ti_int b);               // a * b
-si_int __divsi3    (si_int a, si_int b);               // a / b   signed
-di_int __divdi3    (di_int a, di_int b);               // a / b   signed
-ti_int __divti3    (ti_int a, ti_int b);               // a / b   signed
-su_int __udivsi3   (su_int n, su_int d);               // a / b   unsigned
-du_int __udivdi3   (du_int a, du_int b);               // a / b   unsigned
-tu_int __udivti3   (tu_int a, tu_int b);               // a / b   unsigned
-si_int __modsi3    (si_int a, si_int b);               // a % b   signed
-di_int __moddi3    (di_int a, di_int b);               // a % b   signed
-ti_int __modti3    (ti_int a, ti_int b);               // a % b   signed
-su_int __umodsi3   (su_int a, su_int b);               // a % b   unsigned
-du_int __umoddi3   (du_int a, du_int b);               // a % b   unsigned
-tu_int __umodti3   (tu_int a, tu_int b);               // a % b   unsigned
-du_int __udivmoddi4(du_int a, du_int b, du_int* rem);  // a / b, *rem = a % b  unsigned
-tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);  // a / b, *rem = a % b  unsigned
-su_int __udivmodsi4(su_int a, su_int b, su_int* rem);  // a / b, *rem = a % b  unsigned
-si_int __divmodsi4(si_int a, si_int b, si_int* rem);   // a / b, *rem = a % b  signed
-
-
-
-//  Integral arithmetic with trapping overflow
-
-si_int __absvsi2(si_int a);           // abs(a)
-di_int __absvdi2(di_int a);           // abs(a)
-ti_int __absvti2(ti_int a);           // abs(a)
-
-si_int __negvsi2(si_int a);           // -a
-di_int __negvdi2(di_int a);           // -a
-ti_int __negvti2(ti_int a);           // -a
-
-si_int __addvsi3(si_int a, si_int b);  // a + b
-di_int __addvdi3(di_int a, di_int b);  // a + b
-ti_int __addvti3(ti_int a, ti_int b);  // a + b
-
-si_int __subvsi3(si_int a, si_int b);  // a - b
-di_int __subvdi3(di_int a, di_int b);  // a - b
-ti_int __subvti3(ti_int a, ti_int b);  // a - b
-
-si_int __mulvsi3(si_int a, si_int b);  // a * b
-di_int __mulvdi3(di_int a, di_int b);  // a * b
-ti_int __mulvti3(ti_int a, ti_int b);  // a * b
-
-
-// Integral arithmetic which returns if overflow
-
-si_int __mulosi4(si_int a, si_int b, int* overflow);  // a * b, overflow set to one if result not in signed range
-di_int __mulodi4(di_int a, di_int b, int* overflow);  // a * b, overflow set to one if result not in signed range
-ti_int __muloti4(ti_int a, ti_int b, int* overflow);  // a * b, overflow set to
- one if result not in signed range
-
-
-//  Integral comparison: a  < b -> 0
-//                       a == b -> 1
-//                       a  > b -> 2
-
-si_int __cmpdi2 (di_int a, di_int b);
-si_int __cmpti2 (ti_int a, ti_int b);
-si_int __ucmpdi2(du_int a, du_int b);
-si_int __ucmpti2(tu_int a, tu_int b);
-
-//  Integral / floating point conversion
-
-di_int __fixsfdi(      float a);
-di_int __fixdfdi(     double a);
-di_int __fixxfdi(long double a);
-
-ti_int __fixsfti(      float a);
-ti_int __fixdfti(     double a);
-ti_int __fixxfti(long double a);
-uint64_t __fixtfdi(long double input);  // ppc only, doesn't match documentation
-
-su_int __fixunssfsi(      float a);
-su_int __fixunsdfsi(     double a);
-su_int __fixunsxfsi(long double a);
-
-du_int __fixunssfdi(      float a);
-du_int __fixunsdfdi(     double a);
-du_int __fixunsxfdi(long double a);
-
-tu_int __fixunssfti(      float a);
-tu_int __fixunsdfti(     double a);
-tu_int __fixunsxfti(long double a);
-uint64_t __fixunstfdi(long double input);  // ppc only
-
-float       __floatdisf(di_int a);
-double      __floatdidf(di_int a);
-long double __floatdixf(di_int a);
-long double __floatditf(int64_t a);        // ppc only
-
-float       __floattisf(ti_int a);
-double      __floattidf(ti_int a);
-long double __floattixf(ti_int a);
-
-float       __floatundisf(du_int a);
-double      __floatundidf(du_int a);
-long double __floatundixf(du_int a);
-long double __floatunditf(uint64_t a);     // ppc only
-
-float       __floatuntisf(tu_int a);
-double      __floatuntidf(tu_int a);
-long double __floatuntixf(tu_int a);
-
-//  Floating point raised to integer power
-
-float       __powisf2(      float a, si_int b);  // a ^ b
-double      __powidf2(     double a, si_int b);  // a ^ b
-long double __powixf2(long double a, si_int b);  // a ^ b
-long double __powitf2(long double a, si_int b);  // ppc only, a ^ b
-
-//  Complex arithmetic
-
-//  (a + ib) * (c + id)
-
-      float _Complex __mulsc3( float a,  float b,  float c,  float d);
-     double _Complex __muldc3(double a, double b, double c, double d);
-long double _Complex __mulxc3(long double a, long double b,
-                              long double c, long double d);
-long double _Complex __multc3(long double a, long double b,
-                              long double c, long double d); // ppc only
-
-//  (a + ib) / (c + id)
-
-      float _Complex __divsc3( float a,  float b,  float c,  float d);
-     double _Complex __divdc3(double a, double b, double c, double d);
-long double _Complex __divxc3(long double a, long double b,
-                              long double c, long double d);
-long double _Complex __divtc3(long double a, long double b,
-                              long double c, long double d);  // ppc only
-
-
-//         Runtime support
-
-// __clear_cache() is used to tell process that new instructions have been
-// written to an address range.  Necessary on processors that do not have
-// a unified instruction and data cache.
-void __clear_cache(void* start, void* end);
-
-// __enable_execute_stack() is used with nested functions when a trampoline
-// function is written onto the stack and that page range needs to be made
-// executable.
-void __enable_execute_stack(void* addr);
-
-// __gcc_personality_v0() is normally only called by the system unwinder.
-// C code (as opposed to C++) normally does not need a personality function
-// because there are no catch clauses or destructors to be run.  But there
-// is a C language extension __attribute__((cleanup(func))) which marks local
-// variables as needing the cleanup function "func" to be run when the
-// variable goes out of scope.  That includes when an exception is thrown,
-// so a personality handler is needed.  
-_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions,
-         uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject,
-         _Unwind_Context_t context);
-
-// for use with some implementations of assert() in <assert.h>
-void __eprintf(const char* format, const char* assertion_expression,
-				const char* line, const char* file);
-				
-
-
-//   Power PC specific functions
-
-// There is no C interface to the saveFP/restFP functions.  They are helper
-// functions called by the prolog and epilog of functions that need to save
-// a number of non-volatile float point registers.  
-saveFP
-restFP
-
-// PowerPC has a standard template for trampoline functions.  This function
-// generates a custom trampoline function with the specific realFunc
-// and localsPtr values.
-void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, 
-                                const void* realFunc, void* localsPtr);
-
-// adds two 128-bit double-double precision values ( x + y )
-long double __gcc_qadd(long double x, long double y);  
-
-// subtracts two 128-bit double-double precision values ( x - y )
-long double __gcc_qsub(long double x, long double y); 
-
-// multiples two 128-bit double-double precision values ( x * y )
-long double __gcc_qmul(long double x, long double y);  
-
-// divides two 128-bit double-double precision values ( x / y )
-long double __gcc_qdiv(long double a, long double b);  
-
-
-//    ARM specific functions
-
-// There is no C interface to the switch* functions.  These helper functions
-// are only needed by Thumb1 code for efficient switch table generation.
-switch16
-switch32
-switch8
-switchu8
-
-// There is no C interface to the *_vfp_d8_d15_regs functions.  There are
-// called in the prolog and epilog of Thumb1 functions.  When the C++ ABI use
-// SJLJ for exceptions, each function with a catch clause or destuctors needs
-// to save and restore all registers in it prolog and epliog.  But there is 
-// no way to access vector and high float registers from thumb1 code, so the 
-// compiler must add call outs to these helper functions in the prolog and 
-// epilog.
-restore_vfp_d8_d15_regs
-save_vfp_d8_d15_regs
-
-
-// Note: long ago ARM processors did not have floating point hardware support.
-// Floating point was done in software and floating point parameters were 
-// passed in integer registers.  When hardware support was added for floating
-// point, new *vfp functions were added to do the same operations but with 
-// floating point parameters in floating point registers.
-
-// Undocumented functions
-
-float  __addsf3vfp(float a, float b);   // Appears to return a + b
-double __adddf3vfp(double a, double b); // Appears to return a + b
-float  __divsf3vfp(float a, float b);   // Appears to return a / b
-double __divdf3vfp(double a, double b); // Appears to return a / b
-int    __eqsf2vfp(float a, float b);    // Appears to return  one
-                                        //     iff a == b and neither is NaN.
-int    __eqdf2vfp(double a, double b);  // Appears to return  one
-                                        //     iff a == b and neither is NaN.
-double __extendsfdf2vfp(float a);       // Appears to convert from
-                                        //     float to double.
-int    __fixdfsivfp(double a);          // Appears to convert from
-                                        //     double to int.
-int    __fixsfsivfp(float a);           // Appears to convert from
-                                        //     float to int.
-unsigned int __fixunssfsivfp(float a);  // Appears to convert from
-                                        //     float to unsigned int.
-unsigned int __fixunsdfsivfp(double a); // Appears to convert from
-                                        //     double to unsigned int.
-double __floatsidfvfp(int a);           // Appears to convert from
-                                        //     int to double.
-float __floatsisfvfp(int a);            // Appears to convert from
-                                        //     int to float.
-double __floatunssidfvfp(unsigned int a); // Appears to convert from
-                                        //     unisgned int to double.
-float __floatunssisfvfp(unsigned int a); // Appears to convert from
-                                        //     unisgned int to float.
-int __gedf2vfp(double a, double b);     // Appears to return __gedf2
-                                        //     (a >= b)
-int __gesf2vfp(float a, float b);       // Appears to return __gesf2
-                                        //     (a >= b)
-int __gtdf2vfp(double a, double b);     // Appears to return __gtdf2
-                                        //     (a > b)
-int __gtsf2vfp(float a, float b);       // Appears to return __gtsf2
-                                        //     (a > b)
-int __ledf2vfp(double a, double b);     // Appears to return __ledf2
-                                        //     (a <= b)
-int __lesf2vfp(float a, float b);       // Appears to return __lesf2
-                                        //     (a <= b)
-int __ltdf2vfp(double a, double b);     // Appears to return __ltdf2
-                                        //     (a < b)
-int __ltsf2vfp(float a, float b);       // Appears to return __ltsf2
-                                        //     (a < b)
-double __muldf3vfp(double a, double b); // Appears to return a * b
-float __mulsf3vfp(float a, float b);    // Appears to return a * b
-int __nedf2vfp(double a, double b);     // Appears to return __nedf2
-                                        //     (a != b)
-double __negdf2vfp(double a);           // Appears to return -a
-float __negsf2vfp(float a);             // Appears to return -a
-float __negsf2vfp(float a);             // Appears to return -a
-double __subdf3vfp(double a, double b); // Appears to return a - b
-float __subsf3vfp(float a, float b);    // Appears to return a - b
-float __truncdfsf2vfp(double a);        // Appears to convert from
-                                        //     double to float.
-int __unorddf2vfp(double a, double b);  // Appears to return __unorddf2
-int __unordsf2vfp(float a, float b);    // Appears to return __unordsf2
-
-
-Preconditions are listed for each function at the definition when there are any.
-Any preconditions reflect the specification at
-http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc.
-
-Assumptions are listed in "int_lib.h", and in individual files.  Where possible
-assumptions are checked at compile time.
diff --git a/cmake/Modules/AddCompilerRT.cmake b/cmake/Modules/AddCompilerRT.cmake
index 69c30e3..0f6260a 100644
--- a/cmake/Modules/AddCompilerRT.cmake
+++ b/cmake/Modules/AddCompilerRT.cmake
@@ -113,15 +113,33 @@
     LIBRARY DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
 endmacro()
 
+set(COMPILER_RT_TEST_CFLAGS)
+
 # Unittests support.
 set(COMPILER_RT_GTEST_PATH ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest)
 set(COMPILER_RT_GTEST_SOURCE ${COMPILER_RT_GTEST_PATH}/src/gtest-all.cc)
 set(COMPILER_RT_GTEST_CFLAGS
   -DGTEST_NO_LLVM_RAW_OSTREAM=1
+  -DGTEST_HAS_RTTI=0
   -I${COMPILER_RT_GTEST_PATH}/include
   -I${COMPILER_RT_GTEST_PATH}
 )
 
+if(MSVC)
+  # clang doesn't support exceptions on Windows yet.
+  list(APPEND COMPILER_RT_TEST_CFLAGS
+       -D_HAS_EXCEPTIONS=0)
+
+  # We should teach clang to understand "#pragma intrinsic", see PR19898.
+  list(APPEND COMPILER_RT_TEST_CFLAGS -Wno-undefined-inline)
+
+  # Clang doesn't support SEH on Windows yet.
+  list(APPEND COMPILER_RT_GTEST_CFLAGS -DGTEST_HAS_SEH=0)
+
+  # gtest use a lot of stuff marked as deprecated on Windows.
+  list(APPEND COMPILER_RT_GTEST_CFLAGS -Wno-deprecated-declarations)
+endif()
+
 # Link objects into a single executable with COMPILER_RT_TEST_COMPILER,
 # using specified link flags. Make executable a part of provided
 # test_suite.
@@ -147,9 +165,8 @@
     separate_arguments(TEST_LINK_FLAGS)
   endif()
   add_custom_target(${test_name}
-    # MSVS CL doesn't allow a space between -Fe and the output file name.
     COMMAND ${COMPILER_RT_TEST_COMPILER} ${TEST_OBJECTS}
-            ${COMPILER_RT_TEST_COMPILER_EXE}"${output_bin}"
+            -o "${output_bin}"
             ${TEST_LINK_FLAGS}
     DEPENDS ${TEST_DEPS})
   # Make the test suite depend on the binary.
diff --git a/cmake/Modules/CompilerRTCompile.cmake b/cmake/Modules/CompilerRTCompile.cmake
index 4885c82..2d1dd22 100644
--- a/cmake/Modules/CompilerRTCompile.cmake
+++ b/cmake/Modules/CompilerRTCompile.cmake
@@ -17,15 +17,21 @@
   else()
     string(REPLACE " " ";" global_flags "${CMAKE_C_FLAGS}")
   endif()
+  # On Windows, CMAKE_*_FLAGS are built for MSVC but we use the GCC clang.exe
+  # which doesn't support flags starting with "/smth". Replace those with
+  # "-smth" equivalents.
+  if(MSVC)
+    string(REGEX REPLACE "^/" "-" global_flags "${global_flags}")
+    string(REPLACE ";/" ";-" global_flags "${global_flags}")
+  endif()
   # Ignore unknown warnings. CMAKE_CXX_FLAGS may contain GCC-specific options
   # which are not supported by Clang.
   list(APPEND global_flags -Wno-unknown-warning-option)
   set(compile_flags ${global_flags} ${SOURCE_CFLAGS})
   add_custom_command(
     OUTPUT ${object_file}
-    # MSVS CL doesn't allow a space between -Fo and the object file name.
     COMMAND ${COMPILER_RT_TEST_COMPILER} ${compile_flags} -c
-            ${COMPILER_RT_TEST_COMPILER_OBJ}"${object_file}"
+            -o "${object_file}"
             ${source_rpath}
     MAIN_DEPENDENCY ${source}
     DEPENDS ${SOURCE_DEPS})
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index b5d98a8..7f8664e 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(SANITIZER_HEADERS
+  sanitizer/allocator_interface.h
   sanitizer/asan_interface.h
   sanitizer/common_interface_defs.h
   sanitizer/dfsan_interface.h
diff --git a/include/sanitizer/allocator_interface.h b/include/sanitizer/allocator_interface.h
new file mode 100644
index 0000000..ab251f8
--- /dev/null
+++ b/include/sanitizer/allocator_interface.h
@@ -0,0 +1,66 @@
+//===-- allocator_interface.h ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Public interface header for allocator used in sanitizers (ASan/TSan/MSan).
+//===----------------------------------------------------------------------===//
+#ifndef SANITIZER_ALLOCATOR_INTERFACE_H
+#define SANITIZER_ALLOCATOR_INTERFACE_H
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+  /* Returns the estimated number of bytes that will be reserved by allocator
+     for request of "size" bytes. If allocator can't allocate that much
+     memory, returns the maximal possible allocation size, otherwise returns
+     "size". */
+  size_t __sanitizer_get_estimated_allocated_size(size_t size);
+
+  /* Returns true if p was returned by the allocator and
+     is not yet freed. */
+  int __sanitizer_get_ownership(const volatile void *p);
+
+  /* Returns the number of bytes reserved for the pointer p.
+     Requires (get_ownership(p) == true) or (p == 0). */
+  size_t __sanitizer_get_allocated_size(const volatile void *p);
+
+  /* Number of bytes, allocated and not yet freed by the application. */
+  size_t __sanitizer_get_current_allocated_bytes();
+
+  /* Number of bytes, mmaped by the allocator to fulfill allocation requests.
+     Generally, for request of X bytes, allocator can reserve and add to free
+     lists a large number of chunks of size X to use them for future requests.
+     All these chunks count toward the heap size. Currently, allocator never
+     releases memory to OS (instead, it just puts freed chunks to free
+     lists). */
+  size_t __sanitizer_get_heap_size();
+
+  /* Number of bytes, mmaped by the allocator, which can be used to fulfill
+     allocation requests. When a user program frees memory chunk, it can first
+     fall into quarantine and will count toward __sanitizer_get_free_bytes()
+     later. */
+  size_t __sanitizer_get_free_bytes();
+
+  /* Number of bytes in unmapped pages, that are released to OS. Currently,
+     always returns 0. */
+  size_t __sanitizer_get_unmapped_bytes();
+
+  /* Malloc hooks that may be optionally provided by user.
+     __sanitizer_malloc_hook(ptr, size) is called immediately after
+       allocation of "size" bytes, which returned "ptr".
+     __sanitizer_free_hook(ptr) is called immediately before
+       deallocation of "ptr". */
+  void __sanitizer_malloc_hook(const volatile void *ptr, size_t size);
+  void __sanitizer_free_hook(const volatile void *ptr);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif
diff --git a/include/sanitizer/asan_interface.h b/include/sanitizer/asan_interface.h
index d244346..23fc178 100644
--- a/include/sanitizer/asan_interface.h
+++ b/include/sanitizer/asan_interface.h
@@ -87,28 +87,42 @@
   // for request of "size" bytes. If ASan allocator can't allocate that much
   // memory, returns the maximal possible allocation size, otherwise returns
   // "size".
+  /* DEPRECATED: Use __sanitizer_get_estimated_allocated_size instead. */
   size_t __asan_get_estimated_allocated_size(size_t size);
+
   // Returns 1 if p was returned by the ASan allocator and is not yet freed.
   // Otherwise returns 0.
+  /* DEPRECATED: Use __sanitizer_get_ownership instead. */
   int __asan_get_ownership(const void *p);
+
   // Returns the number of bytes reserved for the pointer p.
   // Requires (get_ownership(p) == true) or (p == 0).
+  /* DEPRECATED: Use __sanitizer_get_allocated_size instead. */
   size_t __asan_get_allocated_size(const void *p);
+
   // Number of bytes, allocated and not yet freed by the application.
+  /* DEPRECATED: Use __sanitizer_get_current_allocated_bytes instead. */
   size_t __asan_get_current_allocated_bytes();
+
   // Number of bytes, mmaped by asan allocator to fulfill allocation requests.
   // Generally, for request of X bytes, allocator can reserve and add to free
   // lists a large number of chunks of size X to use them for future requests.
   // All these chunks count toward the heap size. Currently, allocator never
   // releases memory to OS (instead, it just puts freed chunks to free lists).
+  /* DEPRECATED: Use __sanitizer_get_heap_size instead. */
   size_t __asan_get_heap_size();
+
   // Number of bytes, mmaped by asan allocator, which can be used to fulfill
   // allocation requests. When a user program frees memory chunk, it can first
   // fall into quarantine and will count toward __asan_get_free_bytes() later.
+  /* DEPRECATED: Use __sanitizer_get_free_bytes instead. */
   size_t __asan_get_free_bytes();
+
   // Number of bytes in unmapped pages, that are released to OS. Currently,
   // always returns 0.
+  /* DEPRECATED: Use __sanitizer_get_unmapped_bytes instead. */
   size_t __asan_get_unmapped_bytes();
+
   // Prints accumulated stats to stderr. Used for debugging.
   void __asan_print_accumulated_stats();
 
@@ -121,6 +135,7 @@
   //   allocation of "size" bytes, which returned "ptr".
   // __asan_free_hook(ptr) is called immediately before
   //   deallocation of "ptr".
+  /* DEPRECATED: Use __sanitizer_malloc_hook / __sanitizer_free_hook instead. */
   void __asan_malloc_hook(void *ptr, size_t size);
   void __asan_free_hook(void *ptr);
 
diff --git a/include/sanitizer/msan_interface.h b/include/sanitizer/msan_interface.h
index e8c510b..f6a62be 100644
--- a/include/sanitizer/msan_interface.h
+++ b/include/sanitizer/msan_interface.h
@@ -89,8 +89,8 @@
      a string containing Msan runtime options. See msan_flags.h for details. */
   const char* __msan_default_options();
 
-  // Sets the callback to be called right before death on error.
-  // Passing 0 will unset the callback.
+  /* Sets the callback to be called right before death on error.
+     Passing 0 will unset the callback. */
   void __msan_set_death_callback(void (*callback)(void));
 
   /***********************************/
@@ -100,17 +100,21 @@
      for request of "size" bytes. If Msan allocator can't allocate that much
      memory, returns the maximal possible allocation size, otherwise returns
      "size". */
+  /* DEPRECATED: Use __sanitizer_get_estimated_allocated_size instead. */
   size_t __msan_get_estimated_allocated_size(size_t size);
 
   /* Returns true if p was returned by the Msan allocator and
      is not yet freed. */
+  /* DEPRECATED: Use __sanitizer_get_ownership instead. */
   int __msan_get_ownership(const volatile void *p);
 
   /* Returns the number of bytes reserved for the pointer p.
      Requires (get_ownership(p) == true) or (p == 0). */
+  /* DEPRECATED: Use __sanitizer_get_allocated_size instead. */
   size_t __msan_get_allocated_size(const volatile void *p);
 
   /* Number of bytes, allocated and not yet freed by the application. */
+  /* DEPRECATED: Use __sanitizer_get_current_allocated_bytes instead. */
   size_t __msan_get_current_allocated_bytes();
 
   /* Number of bytes, mmaped by msan allocator to fulfill allocation requests.
@@ -119,16 +123,19 @@
      All these chunks count toward the heap size. Currently, allocator never
      releases memory to OS (instead, it just puts freed chunks to free
      lists). */
+  /* DEPRECATED: Use __sanitizer_get_heap_size instead. */
   size_t __msan_get_heap_size();
 
   /* Number of bytes, mmaped by msan allocator, which can be used to fulfill
      allocation requests. When a user program frees memory chunk, it can first
      fall into quarantine and will count toward __msan_get_free_bytes()
      later. */
+  /* DEPRECATED: Use __sanitizer_get_free_bytes instead. */
   size_t __msan_get_free_bytes();
 
   /* Number of bytes in unmapped pages, that are released to OS. Currently,
      always returns 0. */
+  /* DEPRECATED: Use __sanitizer_get_unmapped_bytes instead. */
   size_t __msan_get_unmapped_bytes();
 
   /* Malloc hooks that may be optionally provided by user.
@@ -136,8 +143,10 @@
        allocation of "size" bytes, which returned "ptr".
      __msan_free_hook(ptr) is called immediately before
        deallocation of "ptr". */
+  /* DEPRECATED: Use __sanitizer_malloc_hook / __sanitizer_free_hook instead. */
   void __msan_malloc_hook(const volatile void *ptr, size_t size);
   void __msan_free_hook(const volatile void *ptr);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/lib/asan/Android.mk b/lib/asan/Android.mk
index 1007e98..77f273d 100644
--- a/lib/asan/Android.mk
+++ b/lib/asan/Android.mk
@@ -100,7 +100,7 @@
 #tests/asan_test_main.cc \
 
 asan_test_cflags := \
-	-mllvm -asan-blacklist=external/compiler-rt/lib/asan/tests/asan_test.ignore \
+	-fsanitize-blacklist=external/compiler-rt/lib/asan/tests/asan_test.ignore \
 	-DASAN_LOW_MEMORY=1 \
 	-DASAN_UAR=0 \
 	-DASAN_NEEDS_SEGV=$(ASAN_NEEDS_SEGV) \
diff --git a/lib/asan/CMakeLists.txt b/lib/asan/CMakeLists.txt
index ea3c720..b23a7a2 100644
--- a/lib/asan/CMakeLists.txt
+++ b/lib/asan/CMakeLists.txt
@@ -61,12 +61,6 @@
 append_if(COMPILER_RT_HAS_LIBPTHREAD pthread ASAN_DYNAMIC_LIBS)
 append_if(COMPILER_RT_HAS_LIBDL dl ASAN_DYNAMIC_LIBS)
 
-if (NOT MSVC)
-  set(ASAN_ASM_SOURCES asan_asm_instrumentation.S)
-  set_source_files_properties(${ASAN_ASM_SOURCES} PROPERTIES LANGUAGE C)
-  list(APPEND ASAN_SOURCES ${ASAN_ASM_SOURCES})
-endif()
-
 # Compile ASan sources into an object library.
 if(APPLE)
   foreach(os ${SANITIZER_COMMON_SUPPORTED_DARWIN_OS})
@@ -127,6 +121,9 @@
     COMPILE_DEFINITIONS ${ASAN_COMMON_DEFINITIONS})
   target_link_libraries(clang_rt.asan-arm-android dl log)
   add_dependencies(asan clang_rt.asan-arm-android)
+  install(TARGETS clang_rt.asan-arm-android
+          ARCHIVE DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR}
+          LIBRARY DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
 else()
   # Build separate libraries for each target.
   foreach(arch ${ASAN_SUPPORTED_ARCH})
diff --git a/lib/asan/README.txt b/lib/asan/README.txt
index e4f4961..b9c43ac 100644
--- a/lib/asan/README.txt
+++ b/lib/asan/README.txt
@@ -1,16 +1,15 @@
 AddressSanitizer RT
 ================================
-This directory contains sources of the AddressSanitizer (asan) run-time library.
+This directory contains sources of the AddressSanitizer (asan) runtime library.
 We are in the process of integrating AddressSanitizer with LLVM, stay tuned.
 
-Directory structre:
+Directory structure:
 README.txt       : This file.
 Makefile.mk      : File for make-based build.
 CMakeLists.txt   : File for cmake-based build.
-asan_*.{cc,h}    : Sources of the asan run-time lirbary.
+asan_*.{cc,h}    : Sources of the asan runtime library.
 scripts/*        : Helper scripts.
 tests/*          : ASan unit tests.
-lit_tests/*      : ASan output tests.
 
 Also ASan runtime needs the following libraries:
 lib/interception/      : Machinery used to intercept function calls.
diff --git a/lib/asan/asan_allocator2.cc b/lib/asan/asan_allocator2.cc
index b6513b2..f07b0f0 100644
--- a/lib/asan/asan_allocator2.cc
+++ b/lib/asan/asan_allocator2.cc
@@ -21,6 +21,7 @@
 #include "asan_report.h"
 #include "asan_stack.h"
 #include "asan_thread.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_flags.h"
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_list.h"
@@ -760,26 +761,35 @@
 
 // ASan allocator doesn't reserve extra bytes, so normally we would
 // just return "size". We don't want to expose our redzone sizes, etc here.
-uptr __asan_get_estimated_allocated_size(uptr size) {
+uptr __sanitizer_get_estimated_allocated_size(uptr size) {
   return size;
 }
+uptr __asan_get_estimated_allocated_size(uptr size) {
+  return __sanitizer_get_estimated_allocated_size(size);
+}
 
-int __asan_get_ownership(const void *p) {
+int __sanitizer_get_ownership(const void *p) {
   uptr ptr = reinterpret_cast<uptr>(p);
   return (AllocationSize(ptr) > 0);
 }
+int __asan_get_ownership(const void *p) {
+  return __sanitizer_get_ownership(p);
+}
 
-uptr __asan_get_allocated_size(const void *p) {
+uptr __sanitizer_get_allocated_size(const void *p) {
   if (p == 0) return 0;
   uptr ptr = reinterpret_cast<uptr>(p);
   uptr allocated_size = AllocationSize(ptr);
   // Die if p is not malloced or if it is already freed.
   if (allocated_size == 0) {
     GET_STACK_TRACE_FATAL_HERE;
-    ReportAsanGetAllocatedSizeNotOwned(ptr, &stack);
+    ReportSanitizerGetAllocatedSizeNotOwned(ptr, &stack);
   }
   return allocated_size;
 }
+uptr __asan_get_allocated_size(const void *p) {
+  return __sanitizer_get_allocated_size(p);
+}
 
 #if !SANITIZER_SUPPORTS_WEAK_HOOKS
 // Provide default (no-op) implementation of malloc hooks.
@@ -793,5 +803,14 @@
 void __asan_free_hook(void *ptr) {
   (void)ptr;
 }
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_malloc_hook(void *ptr, uptr size) {
+  (void)ptr;
+  (void)size;
+}
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+void __sanitizer_free_hook(void *ptr) {
+  (void)ptr;
+}
 }  // extern "C"
 #endif
diff --git a/lib/asan/asan_asm_instrumentation.S b/lib/asan/asan_asm_instrumentation.S
deleted file mode 100644
index 2f812e7..0000000
--- a/lib/asan/asan_asm_instrumentation.S
+++ /dev/null
@@ -1,601 +0,0 @@
-// This file was generated by gen_asm_instrumentation.sh. Please, do not edit
-// manually.
-#ifdef __linux__
-.section .text
-#if defined(__x86_64__) || defined(__i386__)
-.globl __asan_report_store1
-.globl __asan_report_load1
-.globl __asan_report_store2
-.globl __asan_report_load2
-.globl __asan_report_store4
-.globl __asan_report_load4
-.globl __asan_report_store8
-.globl __asan_report_load8
-.globl __asan_report_store16
-.globl __asan_report_load16
-#endif //  defined(__x86_64__) || defined(__i386__)
-#if defined(__i386__)
-// Sanitize 1-byte store. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl __sanitizer_sanitize_store1
-.type __sanitizer_sanitize_store1, @function
-__sanitizer_sanitize_store1:
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushl %edx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl $0x3, %ecx
-  movb 0x20000000(%ecx), %cl
-  testb %cl, %cl
-  je .sanitize_store1_done
-  movl %eax, %edx
-  andl $0x7, %edx
-  movsbl %cl, %ecx
-  cmpl %ecx, %edx
-  jl .sanitize_store1_done
-  pushl %eax
-  cld
-  emms
-  call __asan_report_store1@PLT
-.sanitize_store1_done:
-  popfl
-  popl %edx
-  popl %ecx
-  popl %eax
-  leave
-  ret
-// Sanitize 1-byte load. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl __sanitizer_sanitize_load1
-.type __sanitizer_sanitize_load1, @function
-__sanitizer_sanitize_load1:
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushl %edx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl $0x3, %ecx
-  movb 0x20000000(%ecx), %cl
-  testb %cl, %cl
-  je .sanitize_load1_done
-  movl %eax, %edx
-  andl $0x7, %edx
-  movsbl %cl, %ecx
-  cmpl %ecx, %edx
-  jl .sanitize_load1_done
-  pushl %eax
-  cld
-  emms
-  call __asan_report_load1@PLT
-.sanitize_load1_done:
-  popfl
-  popl %edx
-  popl %ecx
-  popl %eax
-  leave
-  ret
-// Sanitize 2-byte store. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl __sanitizer_sanitize_store2
-.type __sanitizer_sanitize_store2, @function
-__sanitizer_sanitize_store2:
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushl %edx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl $0x3, %ecx
-  movb 0x20000000(%ecx), %cl
-  testb %cl, %cl
-  je .sanitize_store2_done
-  movl %eax, %edx
-  andl $0x7, %edx
-  incl %edx
-  movsbl %cl, %ecx
-  cmpl %ecx, %edx
-  jl .sanitize_store2_done
-  pushl %eax
-  cld
-  emms
-  call __asan_report_store2@PLT
-.sanitize_store2_done:
-  popfl
-  popl %edx
-  popl %ecx
-  popl %eax
-  leave
-  ret
-// Sanitize 2-byte load. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl __sanitizer_sanitize_load2
-.type __sanitizer_sanitize_load2, @function
-__sanitizer_sanitize_load2:
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushl %edx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl $0x3, %ecx
-  movb 0x20000000(%ecx), %cl
-  testb %cl, %cl
-  je .sanitize_load2_done
-  movl %eax, %edx
-  andl $0x7, %edx
-  incl %edx
-  movsbl %cl, %ecx
-  cmpl %ecx, %edx
-  jl .sanitize_load2_done
-  pushl %eax
-  cld
-  emms
-  call __asan_report_load2@PLT
-.sanitize_load2_done:
-  popfl
-  popl %edx
-  popl %ecx
-  popl %eax
-  leave
-  ret
-// Sanitize 4-byte store. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl __sanitizer_sanitize_store4
-.type __sanitizer_sanitize_store4, @function
-__sanitizer_sanitize_store4:
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushl %edx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl $0x3, %ecx
-  movb 0x20000000(%ecx), %cl
-  testb %cl, %cl
-  je .sanitize_store4_done
-  movl %eax, %edx
-  andl $0x7, %edx
-  addl $0x3, %edx
-  movsbl %cl, %ecx
-  cmpl %ecx, %edx
-  jl .sanitize_store4_done
-  pushl %eax
-  cld
-  emms
-  call __asan_report_store4@PLT
-.sanitize_store4_done:
-  popfl
-  popl %edx
-  popl %ecx
-  popl %eax
-  leave
-  ret
-// Sanitize 4-byte load. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl __sanitizer_sanitize_load4
-.type __sanitizer_sanitize_load4, @function
-__sanitizer_sanitize_load4:
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushl %edx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl $0x3, %ecx
-  movb 0x20000000(%ecx), %cl
-  testb %cl, %cl
-  je .sanitize_load4_done
-  movl %eax, %edx
-  andl $0x7, %edx
-  addl $0x3, %edx
-  movsbl %cl, %ecx
-  cmpl %ecx, %edx
-  jl .sanitize_load4_done
-  pushl %eax
-  cld
-  emms
-  call __asan_report_load4@PLT
-.sanitize_load4_done:
-  popfl
-  popl %edx
-  popl %ecx
-  popl %eax
-  leave
-  ret
-// Sanitize 8-byte store. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl __sanitizer_sanitize_store8
-.type __sanitizer_sanitize_store8, @function
-__sanitizer_sanitize_store8:
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl $0x3, %ecx
-  cmpb $0x0, 0x20000000(%ecx)
-  je .sanitize_store8_done
-  pushl %eax
-  cld
-  emms
-  call __asan_report_store8@PLT
-.sanitize_store8_done:
-  popfl
-  popl %ecx
-  popl %eax
-  leave
-  ret
-// Sanitize 8-byte load. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl __sanitizer_sanitize_load8
-.type __sanitizer_sanitize_load8, @function
-__sanitizer_sanitize_load8:
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl $0x3, %ecx
-  cmpb $0x0, 0x20000000(%ecx)
-  je .sanitize_load8_done
-  pushl %eax
-  cld
-  emms
-  call __asan_report_load8@PLT
-.sanitize_load8_done:
-  popfl
-  popl %ecx
-  popl %eax
-  leave
-  ret
-// Sanitize 16-byte store. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl __sanitizer_sanitize_store16
-.type __sanitizer_sanitize_store16, @function
-__sanitizer_sanitize_store16:
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl $0x3, %ecx
-  cmpw $0x0, 0x20000000(%ecx)
-  je .sanitize_store16_done
-  pushl %eax
-  cld
-  emms
-  call __asan_report_store16@PLT
-.sanitize_store16_done:
-  popfl
-  popl %ecx
-  popl %eax
-  leave
-  ret
-// Sanitize 16-byte load. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl __sanitizer_sanitize_load16
-.type __sanitizer_sanitize_load16, @function
-__sanitizer_sanitize_load16:
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl $0x3, %ecx
-  cmpw $0x0, 0x20000000(%ecx)
-  je .sanitize_load16_done
-  pushl %eax
-  cld
-  emms
-  call __asan_report_load16@PLT
-.sanitize_load16_done:
-  popfl
-  popl %ecx
-  popl %eax
-  leave
-  ret
-#endif // defined(__i386__)
-#if defined(__x86_64__)
-// Sanitize 1-byte store. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl __sanitizer_sanitize_store1
-.type __sanitizer_sanitize_store1, @function
-__sanitizer_sanitize_store1:
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushq %rcx
-  pushfq
-  movq %rdi, %rax
-  shrq $0x3, %rax
-  movb 0x7fff8000(%rax), %al
-  test %al, %al
-  je .sanitize_store1_done
-  movl %edi, %ecx
-  andl $0x7, %ecx
-  movsbl %al, %eax
-  cmpl %eax, %ecx
-  jl .sanitize_store1_done
-  subq $8, %rsp
-  andq $-16, %rsp
-  cld
-  emms
-  call __asan_report_store1@PLT
-.sanitize_store1_done:
-  popfq
-  popq %rcx
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-// Sanitize 1-byte load. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl __sanitizer_sanitize_load1
-.type __sanitizer_sanitize_load1, @function
-__sanitizer_sanitize_load1:
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushq %rcx
-  pushfq
-  movq %rdi, %rax
-  shrq $0x3, %rax
-  movb 0x7fff8000(%rax), %al
-  test %al, %al
-  je .sanitize_load1_done
-  movl %edi, %ecx
-  andl $0x7, %ecx
-  movsbl %al, %eax
-  cmpl %eax, %ecx
-  jl .sanitize_load1_done
-  subq $8, %rsp
-  andq $-16, %rsp
-  cld
-  emms
-  call __asan_report_load1@PLT
-.sanitize_load1_done:
-  popfq
-  popq %rcx
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-// Sanitize 2-byte store. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl __sanitizer_sanitize_store2
-.type __sanitizer_sanitize_store2, @function
-__sanitizer_sanitize_store2:
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushq %rcx
-  pushfq
-  movq %rdi, %rax
-  shrq $0x3, %rax
-  movb 0x7fff8000(%rax), %al
-  test %al, %al
-  je .sanitize_store2_done
-  movl %edi, %ecx
-  andl $0x7, %ecx
-  incl %ecx
-  movsbl %al, %eax
-  cmpl %eax, %ecx
-  jl .sanitize_store2_done
-  subq $8, %rsp
-  andq $-16, %rsp
-  cld
-  emms
-  call __asan_report_store2@PLT
-.sanitize_store2_done:
-  popfq
-  popq %rcx
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-// Sanitize 2-byte load. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl __sanitizer_sanitize_load2
-.type __sanitizer_sanitize_load2, @function
-__sanitizer_sanitize_load2:
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushq %rcx
-  pushfq
-  movq %rdi, %rax
-  shrq $0x3, %rax
-  movb 0x7fff8000(%rax), %al
-  test %al, %al
-  je .sanitize_load2_done
-  movl %edi, %ecx
-  andl $0x7, %ecx
-  incl %ecx
-  movsbl %al, %eax
-  cmpl %eax, %ecx
-  jl .sanitize_load2_done
-  subq $8, %rsp
-  andq $-16, %rsp
-  cld
-  emms
-  call __asan_report_load2@PLT
-.sanitize_load2_done:
-  popfq
-  popq %rcx
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-// Sanitize 4-byte store. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl __sanitizer_sanitize_store4
-.type __sanitizer_sanitize_store4, @function
-__sanitizer_sanitize_store4:
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushq %rcx
-  pushfq
-  movq %rdi, %rax
-  shrq $0x3, %rax
-  movb 0x7fff8000(%rax), %al
-  test %al, %al
-  je .sanitize_store4_done
-  movl %edi, %ecx
-  andl $0x7, %ecx
-  addl $0x3, %ecx
-  movsbl %al, %eax
-  cmpl %eax, %ecx
-  jl .sanitize_store4_done
-  subq $8, %rsp
-  andq $-16, %rsp
-  cld
-  emms
-  call __asan_report_store4@PLT
-.sanitize_store4_done:
-  popfq
-  popq %rcx
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-// Sanitize 4-byte load. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl __sanitizer_sanitize_load4
-.type __sanitizer_sanitize_load4, @function
-__sanitizer_sanitize_load4:
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushq %rcx
-  pushfq
-  movq %rdi, %rax
-  shrq $0x3, %rax
-  movb 0x7fff8000(%rax), %al
-  test %al, %al
-  je .sanitize_load4_done
-  movl %edi, %ecx
-  andl $0x7, %ecx
-  addl $0x3, %ecx
-  movsbl %al, %eax
-  cmpl %eax, %ecx
-  jl .sanitize_load4_done
-  subq $8, %rsp
-  andq $-16, %rsp
-  cld
-  emms
-  call __asan_report_load4@PLT
-.sanitize_load4_done:
-  popfq
-  popq %rcx
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-// Sanitize 8-byte store. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl __sanitizer_sanitize_store8
-.type __sanitizer_sanitize_store8, @function
-__sanitizer_sanitize_store8:
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushfq
-  movq %rdi, %rax
-  shrq $0x3, %rax
-  cmpb $0x0, 0x7fff8000(%rax)
-  je .sanitize_store8_done
-  subq $8, %rsp
-  andq $-16, %rsp
-  cld
-  emms
-  call __asan_report_store8@PLT
-.sanitize_store8_done:
-  popfq
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-// Sanitize 8-byte load. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl __sanitizer_sanitize_load8
-.type __sanitizer_sanitize_load8, @function
-__sanitizer_sanitize_load8:
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushfq
-  movq %rdi, %rax
-  shrq $0x3, %rax
-  cmpb $0x0, 0x7fff8000(%rax)
-  je .sanitize_load8_done
-  subq $8, %rsp
-  andq $-16, %rsp
-  cld
-  emms
-  call __asan_report_load8@PLT
-.sanitize_load8_done:
-  popfq
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-// Sanitize 16-byte store. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl __sanitizer_sanitize_store16
-.type __sanitizer_sanitize_store16, @function
-__sanitizer_sanitize_store16:
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushfq
-  movq %rdi, %rax
-  shrq $0x3, %rax
-  cmpw $0x0, 0x7fff8000(%rax)
-  je .sanitize_store16_done
-  subq $8, %rsp
-  andq $-16, %rsp
-  cld
-  emms
-  call __asan_report_store16@PLT
-.sanitize_store16_done:
-  popfq
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-// Sanitize 16-byte load. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl __sanitizer_sanitize_load16
-.type __sanitizer_sanitize_load16, @function
-__sanitizer_sanitize_load16:
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushfq
-  movq %rdi, %rax
-  shrq $0x3, %rax
-  cmpw $0x0, 0x7fff8000(%rax)
-  je .sanitize_load16_done
-  subq $8, %rsp
-  andq $-16, %rsp
-  cld
-  emms
-  call __asan_report_load16@PLT
-.sanitize_load16_done:
-  popfq
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-#endif // defined(__x86_64__)
-/* We do not need executable stack. */
-#if defined(__arm__)
-  .section .note.GNU-stack,"",%progbits
-#else
-  .section .note.GNU-stack,"",@progbits
-#endif // defined(__arm__)
-#endif // __linux__
diff --git a/lib/asan/asan_dll_thunk.cc b/lib/asan/asan_dll_thunk.cc
index 40d0e5d..a3fbb27 100644
--- a/lib/asan/asan_dll_thunk.cc
+++ b/lib/asan/asan_dll_thunk.cc
@@ -20,6 +20,7 @@
 // Using #ifdef rather than relying on Makefiles etc.
 // simplifies the build procedure.
 #ifdef ASAN_DLL_THUNK
+#include "asan_init_version.h"
 #include "sanitizer_common/sanitizer_interception.h"
 
 // ---------- Function interception helper functions and macros ----------- {{{1
@@ -74,7 +75,10 @@
 // Special case of hooks -- ASan own interface functions.  Those are only called
 // after __asan_init, thus an empty implementation is sufficient.
 #define INTERFACE_FUNCTION(name)                                               \
-  extern "C" void name() { __debugbreak(); }                                   \
+  extern "C" void name() {                                                     \
+    volatile int prevent_icf = (__LINE__ << 8); (void)prevent_icf;             \
+    __debugbreak();                                                            \
+  }                                                                            \
   INTERCEPT_WHEN_POSSIBLE(#name, name)
 
 // INTERCEPT_HOOKS must be used after the last INTERCEPT_WHEN_POSSIBLE.
@@ -200,13 +204,13 @@
 
   // Manually wrap __asan_init as we need to initialize
   // __asan_option_detect_stack_use_after_return afterwards.
-  void __asan_init_v3() {
+  void __asan_init() {
     typedef void (*fntype)();
     static fntype fn = 0;
-    // __asan_init_v3 is expected to be called by only one thread.
+    // __asan_init is expected to be called by only one thread.
     if (fn) return;
 
-    fn = (fntype)getRealProcAddressOrDie("__asan_init_v3");
+    fn = (fntype)getRealProcAddressOrDie(__asan_init_name);
     fn();
     __asan_option_detect_stack_use_after_return =
         (__asan_should_detect_stack_use_after_return() != 0);
@@ -273,6 +277,8 @@
 INTERFACE_FUNCTION(__asan_stack_free_9)
 INTERFACE_FUNCTION(__asan_stack_free_10)
 
+INTERFACE_FUNCTION(__sanitizer_cov_module_init)
+
 // TODO(timurrrr): Add more interface functions on the as-needed basis.
 
 // ----------------- Memory allocation functions ---------------------
@@ -320,9 +326,24 @@
 INTERCEPT_LIBRARY_FUNCTION(strtol);
 INTERCEPT_LIBRARY_FUNCTION(wcslen);
 
-// Must be at the end of the file due to the way INTERCEPT_HOOKS is defined.
+// Must be after all the interceptor declarations due to the way INTERCEPT_HOOKS
+// is defined.
 void InterceptHooks() {
   INTERCEPT_HOOKS();
 }
 
+// We want to call __asan_init before C/C++ initializers/constructors are
+// executed, otherwise functions like memset might be invoked.
+// For some strange reason, merely linking in asan_preinit.cc doesn't work
+// as the callback is never called...  Is link.exe doing something too smart?
+
+// In DLLs, the callbacks are expected to return 0,
+// otherwise CRT initialization fails.
+static int call_asan_init() {
+  __asan_init();
+  return 0;
+}
+#pragma section(".CRT$XIB", long, read)  // NOLINT
+__declspec(allocate(".CRT$XIB")) int (*__asan_preinit)() = call_asan_init;
+
 #endif // ASAN_DLL_THUNK
diff --git a/lib/asan/asan_globals.cc b/lib/asan/asan_globals.cc
index cecabc0..a844201 100644
--- a/lib/asan/asan_globals.cc
+++ b/lib/asan/asan_globals.cc
@@ -22,6 +22,7 @@
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_mutex.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
 
 namespace __asan {
 
@@ -45,6 +46,14 @@
 // Lazy-initialized and never deleted.
 static VectorOfGlobals *dynamic_init_globals;
 
+// We want to remember where a certain range of globals was registered.
+struct GlobalRegistrationSite {
+  u32 stack_id;
+  Global *g_first, *g_last;
+};
+typedef InternalMmapVector<GlobalRegistrationSite> GlobalRegistrationSiteVector;
+static GlobalRegistrationSiteVector *global_registration_site_vector;
+
 ALWAYS_INLINE void PoisonShadowForGlobal(const Global *g, u8 value) {
   FastPoisonShadow(g->beg, g->size_with_redzone, value);
 }
@@ -63,8 +72,8 @@
 }
 
 static void ReportGlobal(const Global &g, const char *prefix) {
-  Report("%s Global: beg=%p size=%zu/%zu name=%s module=%s dyn_init=%zu\n",
-         prefix, (void*)g.beg, g.size, g.size_with_redzone, g.name,
+  Report("%s Global[%p]: beg=%p size=%zu/%zu name=%s module=%s dyn_init=%zu\n",
+         prefix, &g, (void*)g.beg, g.size, g.size_with_redzone, g.name,
          g.module_name, g.has_dynamic_init);
 }
 
@@ -81,6 +90,16 @@
   return res;
 }
 
+u32 FindRegistrationSite(const Global *g) {
+  CHECK(global_registration_site_vector);
+  for (uptr i = 0, n = global_registration_site_vector->size(); i < n; i++) {
+    GlobalRegistrationSite &grs = (*global_registration_site_vector)[i];
+    if (g >= grs.g_first && g <= grs.g_last)
+      return grs.stack_id;
+  }
+  return 0;
+}
+
 // Register a global variable.
 // This function may be called more than once for every global
 // so we store the globals in a map.
@@ -101,7 +120,8 @@
       for (ListOfGlobals *l = list_of_all_globals; l; l = l->next) {
         if (g->beg == l->g->beg &&
             (flags()->detect_odr_violation >= 2 || g->size != l->g->size))
-          ReportODRViolation(g, l->g);
+          ReportODRViolation(g, FindRegistrationSite(g),
+                             l->g, FindRegistrationSite(l->g));
       }
     }
   }
@@ -157,7 +177,18 @@
 // Register an array of globals.
 void __asan_register_globals(__asan_global *globals, uptr n) {
   if (!flags()->report_globals) return;
+  GET_STACK_TRACE_FATAL_HERE;
+  u32 stack_id = StackDepotPut(stack.trace, stack.size);
   BlockingMutexLock lock(&mu_for_globals);
+  if (!global_registration_site_vector)
+    global_registration_site_vector =
+        new(allocator_for_globals) GlobalRegistrationSiteVector(128);
+  GlobalRegistrationSite site = {stack_id, &globals[0], &globals[n - 1]};
+  global_registration_site_vector->push_back(site);
+  if (flags()->report_globals >= 2) {
+    PRINT_CURRENT_STACK();
+    Printf("=== ID %d; %p %p\n", stack_id, &globals[0], &globals[n - 1]);
+  }
   for (uptr i = 0; i < n; i++) {
     RegisterGlobal(&globals[i]);
   }
diff --git a/lib/asan/asan_init_version.h b/lib/asan/asan_init_version.h
new file mode 100644
index 0000000..88eb80f
--- /dev/null
+++ b/lib/asan/asan_init_version.h
@@ -0,0 +1,37 @@
+//===-- asan_init_version.h -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+//
+// This header defines a versioned __asan_init function to be called at the
+// startup of the instrumented program.
+//===----------------------------------------------------------------------===//
+#ifndef ASAN_INIT_VERSION_H
+#define ASAN_INIT_VERSION_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+extern "C" {
+  // This function should be called at the very beginning of the process,
+  // before any instrumented code is executed and before any call to malloc.
+  // Every time the ASan ABI changes we also change the version number in this
+  // name. Objects build with incompatible ASan ABI version
+  // will not link with run-time.
+  // Changes between ABI versions:
+  // v1=>v2: added 'module_name' to __asan_global
+  // v2=>v3: stack frame description (created by the compiler)
+  //         contains the function PC as the 3-rd field (see
+  //         DescribeAddressIfStack).
+  // v3=>v4: added '__asan_global_source_location' to __asan_global.
+  SANITIZER_INTERFACE_ATTRIBUTE void __asan_init_v4();
+  #define __asan_init __asan_init_v4
+  #define __asan_init_name "__asan_init_v4"
+}
+
+#endif  // ASAN_INIT_VERSION_H
diff --git a/lib/asan/asan_interceptors.cc b/lib/asan/asan_interceptors.cc
index 9dccddf..4ae03ec 100644
--- a/lib/asan/asan_interceptors.cc
+++ b/lib/asan/asan_interceptors.cc
@@ -697,6 +697,16 @@
 }
 #endif  // ASAN_INTERCEPT___CXA_ATEXIT
 
+#if ASAN_INTERCEPT_FORK
+INTERCEPTOR(int, fork, void) {
+  ENSURE_ASAN_INITED();
+  if (common_flags()->coverage) CovBeforeFork();
+  int pid = REAL(fork)();
+  if (common_flags()->coverage) CovAfterFork(pid);
+  return pid;
+}
+#endif  // ASAN_INTERCEPT_FORK
+
 #if SANITIZER_WINDOWS
 INTERCEPTOR_WINAPI(DWORD, CreateThread,
                    void* security, uptr stack_size,
@@ -808,6 +818,10 @@
   ASAN_INTERCEPT_FUNC(__cxa_atexit);
 #endif
 
+#if ASAN_INTERCEPT_FORK
+  ASAN_INTERCEPT_FUNC(fork);
+#endif
+
   // Some Windows-specific interceptors.
 #if SANITIZER_WINDOWS
   InitializeWindowsInterceptors();
diff --git a/lib/asan/asan_interceptors.h b/lib/asan/asan_interceptors.h
index 3b7b265..c5d1af0 100644
--- a/lib/asan/asan_interceptors.h
+++ b/lib/asan/asan_interceptors.h
@@ -27,6 +27,7 @@
 # define ASAN_INTERCEPT_INDEX 1
 # define ASAN_INTERCEPT_PTHREAD_CREATE 1
 # define ASAN_INTERCEPT_MLOCKX 1
+# define ASAN_INTERCEPT_FORK 1
 #else
 # define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 0
 # define ASAN_INTERCEPT__LONGJMP 0
@@ -34,6 +35,7 @@
 # define ASAN_INTERCEPT_INDEX 0
 # define ASAN_INTERCEPT_PTHREAD_CREATE 0
 # define ASAN_INTERCEPT_MLOCKX 0
+# define ASAN_INTERCEPT_FORK 0
 #endif
 
 #if SANITIZER_FREEBSD || SANITIZER_LINUX
@@ -66,7 +68,9 @@
 # define ASAN_INTERCEPT_SIGLONGJMP 0
 #endif
 
-#if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS
+// Android bug: https://code.google.com/p/android/issues/detail?id=61799
+#if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS && \
+    !(SANITIZER_ANDROID && defined(__i386))
 # define ASAN_INTERCEPT___CXA_THROW 1
 #else
 # define ASAN_INTERCEPT___CXA_THROW 0
diff --git a/lib/asan/asan_interface_internal.h b/lib/asan/asan_interface_internal.h
index 84525d0..32629ea 100644
--- a/lib/asan/asan_interface_internal.h
+++ b/lib/asan/asan_interface_internal.h
@@ -17,21 +17,18 @@
 
 #include "sanitizer_common/sanitizer_internal_defs.h"
 
+#include "asan_init_version.h"
+
 using __sanitizer::uptr;
 
 extern "C" {
-  // This function should be called at the very beginning of the process,
-  // before any instrumented code is executed and before any call to malloc.
-  // Every time the asan ABI changes we also change the version number in this
-  // name. Objects build with incompatible asan ABI version
-  // will not link with run-time.
-  // Changes between ABI versions:
-  // v1=>v2: added 'module_name' to __asan_global
-  // v2=>v3: stack frame description (created by the compiler)
-  //         contains the function PC as the 3-rd field (see
-  //         DescribeAddressIfStack).
-  SANITIZER_INTERFACE_ATTRIBUTE void __asan_init_v3();
-  #define __asan_init __asan_init_v3
+  // This structure is used to describe the source location of a place where
+  // global was defined.
+  struct __asan_global_source_location {
+    const char *filename;
+    int line_no;
+    int column_no;
+  };
 
   // This structure describes an instrumented global variable.
   struct __asan_global {
@@ -42,6 +39,8 @@
     const char *module_name; // Module name as a C string. This pointer is a
                              // unique identifier of a module.
     uptr has_dynamic_init;   // Non-zero if the global has dynamic initializer.
+    __asan_global_source_location *location;  // Source location of a global,
+                                              // or NULL if it is unknown.
   };
 
   // These two functions should be called by the instrumented code.
@@ -99,24 +98,26 @@
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
   /* OPTIONAL */ void __asan_on_error();
 
+  // ---------------------------
+  // FIXME: Replace these functions with __sanitizer equivalent.
   SANITIZER_INTERFACE_ATTRIBUTE
   uptr __asan_get_estimated_allocated_size(uptr size);
-
   SANITIZER_INTERFACE_ATTRIBUTE int __asan_get_ownership(const void *p);
   SANITIZER_INTERFACE_ATTRIBUTE uptr __asan_get_allocated_size(const void *p);
   SANITIZER_INTERFACE_ATTRIBUTE uptr __asan_get_current_allocated_bytes();
   SANITIZER_INTERFACE_ATTRIBUTE uptr __asan_get_heap_size();
   SANITIZER_INTERFACE_ATTRIBUTE uptr __asan_get_free_bytes();
   SANITIZER_INTERFACE_ATTRIBUTE uptr __asan_get_unmapped_bytes();
-  SANITIZER_INTERFACE_ATTRIBUTE void __asan_print_accumulated_stats();
-
-  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
-  /* OPTIONAL */ const char* __asan_default_options();
-
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
   /* OPTIONAL */ void __asan_malloc_hook(void *ptr, uptr size);
   SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
   /* OPTIONAL */ void __asan_free_hook(void *ptr);
+  // ---------------------------
+
+  SANITIZER_INTERFACE_ATTRIBUTE void __asan_print_accumulated_stats();
+
+  SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+  /* OPTIONAL */ const char* __asan_default_options();
 
   // Global flag, copy of ASAN_OPTIONS=detect_stack_use_after_return
   SANITIZER_INTERFACE_ATTRIBUTE
diff --git a/lib/asan/asan_internal.h b/lib/asan/asan_internal.h
index 650a4d1..0782789 100644
--- a/lib/asan/asan_internal.h
+++ b/lib/asan/asan_internal.h
@@ -98,6 +98,8 @@
 
 void ParseExtraActivationFlags();
 
+void *AsanDlSymNext(const char *sym);
+
 // Platform-specific options.
 #if SANITIZER_MAC
 bool PlatformHasDifferentMemcpyAndMemmove();
@@ -110,9 +112,11 @@
 // Add convenient macro for interface functions that may be represented as
 // weak hooks.
 #define ASAN_MALLOC_HOOK(ptr, size) \
-  if (&__asan_malloc_hook) __asan_malloc_hook(ptr, size)
+  if (&__asan_malloc_hook) __asan_malloc_hook(ptr, size); \
+  if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(ptr, size)
 #define ASAN_FREE_HOOK(ptr) \
-  if (&__asan_free_hook) __asan_free_hook(ptr)
+  if (&__asan_free_hook) __asan_free_hook(ptr); \
+  if (&__sanitizer_free_hook) __sanitizer_free_hook(ptr)
 #define ASAN_ON_ERROR() \
   if (&__asan_on_error) __asan_on_error()
 
diff --git a/lib/asan/asan_linux.cc b/lib/asan/asan_linux.cc
index fce9d1c..fdd009c 100644
--- a/lib/asan/asan_linux.cc
+++ b/lib/asan/asan_linux.cc
@@ -19,6 +19,7 @@
 #include "asan_internal.h"
 #include "asan_thread.h"
 #include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_freebsd.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_procmaps.h"
 
@@ -27,6 +28,7 @@
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
+#include <dlfcn.h>
 #include <fcntl.h>
 #include <pthread.h>
 #include <stdio.h>
@@ -42,19 +44,14 @@
 extern "C" void* _DYNAMIC;
 #else
 #include <sys/ucontext.h>
-#include <dlfcn.h>
 #include <link.h>
 #endif
 
-// x86_64 FreeBSD 9.2 and older define 64-bit register names in both 64-bit
-// and 32-bit modes.
-#if SANITIZER_FREEBSD
-#include <sys/param.h>
-# if __FreeBSD_version <= 902001  // v9.2
-#  define mc_eip mc_rip
-#  define mc_ebp mc_rbp
-#  define mc_esp mc_rsp
-# endif
+// x86-64 FreeBSD 9.2 and older define 'ucontext_t' incorrectly in
+// 32-bit mode.
+#if SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32) && \
+  __FreeBSD_version <= 902001  // v9.2
+#define ucontext_t xucontext_t
 #endif
 
 typedef enum {
@@ -91,6 +88,10 @@
   if (!info->dlpi_name || info->dlpi_name[0] == 0)
     return 0;
 
+  // Ignore vDSO
+  if (internal_strncmp(info->dlpi_name, "linux-", sizeof("linux-") - 1) == 0)
+    return 0;
+
   *(const char **)data = info->dlpi_name;
   return 1;
 }
@@ -188,6 +189,13 @@
   *bp = ucontext->uc_mcontext.gregs[REG_EBP];
   *sp = ucontext->uc_mcontext.gregs[REG_ESP];
 # endif
+#elif defined(__powerpc__) || defined(__powerpc64__)
+  ucontext_t *ucontext = (ucontext_t*)context;
+  *pc = ucontext->uc_mcontext.regs->nip;
+  *sp = ucontext->uc_mcontext.regs->gpr[PT_R1];
+  // The powerpc{,64}-linux ABIs do not specify r31 as the frame
+  // pointer, but GCC always uses r31 when we need a frame pointer.
+  *bp = ucontext->uc_mcontext.regs->gpr[PT_R31];
 #elif defined(__sparc__)
   ucontext_t *ucontext = (ucontext_t*)context;
   uptr *stk_ptr;
@@ -232,6 +240,10 @@
 }
 #endif
 
+void *AsanDlSymNext(const char *sym) {
+  return dlsym(RTLD_NEXT, sym);
+}
+
 }  // namespace __asan
 
 #endif  // SANITIZER_FREEBSD || SANITIZER_LINUX
diff --git a/lib/asan/asan_mac.cc b/lib/asan/asan_mac.cc
index 9f2fabd..ed7d9ce 100644
--- a/lib/asan/asan_mac.cc
+++ b/lib/asan/asan_mac.cc
@@ -374,32 +374,44 @@
     work(); \
   }
 
+// Forces the compiler to generate a frame pointer in the function.
+#define ENABLE_FRAME_POINTER                                       \
+  do {                                                             \
+    volatile uptr enable_fp;                                       \
+    enable_fp = GET_CURRENT_FRAME();                               \
+  } while (0)
+
 INTERCEPTOR(void, dispatch_async,
             dispatch_queue_t dq, void(^work)(void)) {
+  ENABLE_FRAME_POINTER;
   GET_ASAN_BLOCK(work);
   REAL(dispatch_async)(dq, asan_block);
 }
 
 INTERCEPTOR(void, dispatch_group_async,
             dispatch_group_t dg, dispatch_queue_t dq, void(^work)(void)) {
+  ENABLE_FRAME_POINTER;
   GET_ASAN_BLOCK(work);
   REAL(dispatch_group_async)(dg, dq, asan_block);
 }
 
 INTERCEPTOR(void, dispatch_after,
             dispatch_time_t when, dispatch_queue_t queue, void(^work)(void)) {
+  ENABLE_FRAME_POINTER;
   GET_ASAN_BLOCK(work);
   REAL(dispatch_after)(when, queue, asan_block);
 }
 
 INTERCEPTOR(void, dispatch_source_set_cancel_handler,
             dispatch_source_t ds, void(^work)(void)) {
+  ENABLE_FRAME_POINTER;
   GET_ASAN_BLOCK(work);
   REAL(dispatch_source_set_cancel_handler)(ds, asan_block);
 }
 
 INTERCEPTOR(void, dispatch_source_set_event_handler,
             dispatch_source_t ds, void(^work)(void)) {
+  ENABLE_FRAME_POINTER;
   GET_ASAN_BLOCK(work);
   REAL(dispatch_source_set_event_handler)(ds, asan_block);
 }
diff --git a/lib/asan/asan_malloc_linux.cc b/lib/asan/asan_malloc_linux.cc
index 52fd4b1..077a50c 100644
--- a/lib/asan/asan_malloc_linux.cc
+++ b/lib/asan/asan_malloc_linux.cc
@@ -29,34 +29,38 @@
 DECLARE_REAL_AND_INTERCEPTOR(void*, calloc, uptr nmemb, uptr size)
 DECLARE_REAL_AND_INTERCEPTOR(void*, realloc, void *ptr, uptr size)
 DECLARE_REAL_AND_INTERCEPTOR(void*, memalign, uptr boundary, uptr size)
+DECLARE_REAL_AND_INTERCEPTOR(uptr, malloc_usable_size, void *mem)
 
 struct MallocDebug {
-  void* (*malloc)(uptr bytes);
-  void  (*free)(void* mem);
-  void* (*calloc)(uptr n_elements, uptr elem_size);
-  void* (*realloc)(void* oldMem, uptr bytes);
-  void* (*memalign)(uptr alignment, uptr bytes);
+  void *(*malloc)(uptr bytes);
+  void (*free)(void *mem);
+  void *(*calloc)(uptr n_elements, uptr elem_size);
+  void *(*realloc)(void *oldMem, uptr bytes);
+  void *(*memalign)(uptr alignment, uptr bytes);
+  uptr (*malloc_usable_size)(void *mem);
 };
 
-const MallocDebug asan_malloc_dispatch ALIGNED(32) = {
-  WRAP(malloc), WRAP(free), WRAP(calloc), WRAP(realloc), WRAP(memalign)
-};
-
-extern "C" const MallocDebug* __libc_malloc_dispatch;
+ALIGNED(32) const MallocDebug asan_malloc_dispatch = {
+    WRAP(malloc),  WRAP(free),     WRAP(calloc),
+    WRAP(realloc), WRAP(memalign), WRAP(malloc_usable_size)};
 
 namespace __asan {
 void ReplaceSystemMalloc() {
-  __libc_malloc_dispatch = &asan_malloc_dispatch;
+  const MallocDebug** __libc_malloc_dispatch_p;
+  __libc_malloc_dispatch_p =
+      (const MallocDebug **)AsanDlSymNext("__libc_malloc_dispatch");
+  if (__libc_malloc_dispatch_p)
+    *__libc_malloc_dispatch_p = &asan_malloc_dispatch;
 }
 }  // namespace __asan
 
-#else  // ANDROID
+#else  // SANITIZER_ANDROID
 
 namespace __asan {
 void ReplaceSystemMalloc() {
 }
 }  // namespace __asan
-#endif  // ANDROID
+#endif  // SANITIZER_ANDROID
 
 // ---------------------- Replacement functions ---------------- {{{1
 using namespace __asan;  // NOLINT
@@ -102,6 +106,11 @@
   return asan_memalign(boundary, size, &stack, FROM_MALLOC);
 }
 
+INTERCEPTOR(void*, aligned_alloc, uptr boundary, uptr size) {
+  GET_STACK_TRACE_MALLOC;
+  return asan_memalign(boundary, size, &stack, FROM_MALLOC);
+}
+
 INTERCEPTOR(void*, __libc_memalign, uptr boundary, uptr size) {
   GET_STACK_TRACE_MALLOC;
   void *res = asan_memalign(boundary, size, &stack, FROM_MALLOC);
diff --git a/lib/asan/asan_mapping.h b/lib/asan/asan_mapping.h
index 1a5c185..8acc99a 100644
--- a/lib/asan/asan_mapping.h
+++ b/lib/asan/asan_mapping.h
@@ -87,6 +87,7 @@
 static const u64 kDefaultShort64bitShadowOffset = 0x7FFF8000;  // < 2G.
 static const u64 kAArch64_ShadowOffset64 = 1ULL << 36;
 static const u64 kMIPS32_ShadowOffset32 = 0x0aaa8000;
+static const u64 kPPC64_ShadowOffset64 = 1ULL << 41;
 static const u64 kFreeBSD_ShadowOffset32 = 1ULL << 30;  // 0x40000000
 static const u64 kFreeBSD_ShadowOffset64 = 1ULL << 46;  // 0x400000000000
 
@@ -109,6 +110,8 @@
 # else
 #  if defined(__aarch64__)
 #    define SHADOW_OFFSET kAArch64_ShadowOffset64
+#  elif defined(__powerpc64__)
+#    define SHADOW_OFFSET kPPC64_ShadowOffset64
 #  elif SANITIZER_FREEBSD
 #    define SHADOW_OFFSET kFreeBSD_ShadowOffset64
 #  elif SANITIZER_MAC
diff --git a/lib/asan/asan_report.cc b/lib/asan/asan_report.cc
index f8b8431..e13d59f 100644
--- a/lib/asan/asan_report.cc
+++ b/lib/asan/asan_report.cc
@@ -212,6 +212,26 @@
               (char *)g.beg);
 }
 
+static const char *GlobalFilename(const __asan_global &g) {
+  const char *res = g.module_name;
+  // Prefer the filename from source location, if is available.
+  if (g.location)
+    res = g.location->filename;
+  CHECK(res);
+  return res;
+}
+
+static void PrintGlobalLocation(InternalScopedString *str,
+                                const __asan_global &g) {
+  str->append("%s", GlobalFilename(g));
+  if (!g.location)
+    return;
+  if (g.location->line_no)
+    str->append(":%d", g.location->line_no);
+  if (g.location->column_no)
+    str->append(":%d", g.location->column_no);
+}
+
 bool DescribeAddressRelativeToGlobal(uptr addr, uptr size,
                                      const __asan_global &g) {
   static const uptr kMinimalDistanceFromAnotherGlobal = 64;
@@ -232,8 +252,10 @@
     // Can it happen?
     str.append("%p is located %zd bytes inside", (void *)addr, addr - g.beg);
   }
-  str.append(" of global variable '%s' from '%s' (0x%zx) of size %zu\n",
-             MaybeDemangleGlobalName(g.name), g.module_name, g.beg, g.size);
+  str.append(" of global variable '%s' defined in '",
+             MaybeDemangleGlobalName(g.name));
+  PrintGlobalLocation(&str, g);
+  str.append("' (0x%zx) of size %zu\n", g.beg, g.size);
   str.append("%s", d.EndLocation());
   PrintGlobalNameIfASCII(&str, g);
   Printf("%s", str.data());
@@ -676,17 +698,17 @@
   ReportErrorSummary("bad-malloc_usable_size", stack);
 }
 
-void ReportAsanGetAllocatedSizeNotOwned(uptr addr, StackTrace *stack) {
+void ReportSanitizerGetAllocatedSizeNotOwned(uptr addr, StackTrace *stack) {
   ScopedInErrorReport in_report;
   Decorator d;
   Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: attempting to call "
-             "__asan_get_allocated_size() for pointer which is "
+             "__sanitizer_get_allocated_size() for pointer which is "
              "not owned: %p\n", addr);
   Printf("%s", d.EndWarning());
   stack->Print();
   DescribeHeapAddress(addr, 1);
-  ReportErrorSummary("bad-__asan_get_allocated_size", stack);
+  ReportErrorSummary("bad-__sanitizer_get_allocated_size", stack);
 }
 
 void ReportStringFunctionMemoryRangesOverlap(
@@ -735,17 +757,31 @@
   ReportErrorSummary("bad-__sanitizer_annotate_contiguous_container", stack);
 }
 
-void ReportODRViolation(const __asan_global *g1, const __asan_global *g2) {
+void ReportODRViolation(const __asan_global *g1, u32 stack_id1,
+                        const __asan_global *g2, u32 stack_id2) {
   ScopedInErrorReport in_report;
   Decorator d;
   Printf("%s", d.Warning());
   Report("ERROR: AddressSanitizer: odr-violation (%p):\n", g1->beg);
   Printf("%s", d.EndWarning());
-  Printf("  [1] size=%zd %s %s\n", g1->size, g1->name, g1->module_name);
-  Printf("  [2] size=%zd %s %s\n", g2->size, g2->name, g2->module_name);
+  InternalScopedString g1_loc(256), g2_loc(256);
+  PrintGlobalLocation(&g1_loc, *g1);
+  PrintGlobalLocation(&g2_loc, *g2);
+  Printf("  [1] size=%zd %s %s\n", g1->size, g1->name, g1_loc.data());
+  Printf("  [2] size=%zd %s %s\n", g2->size, g2->name, g2_loc.data());
+  if (stack_id1 && stack_id2) {
+    Printf("These globals were registered at these points:\n");
+    Printf("  [1]:\n");
+    uptr stack_size;
+    const uptr *stack_trace = StackDepotGet(stack_id1, &stack_size);
+    StackTrace::PrintStack(stack_trace, stack_size);
+    Printf("  [2]:\n");
+    stack_trace = StackDepotGet(stack_id2, &stack_size);
+    StackTrace::PrintStack(stack_trace, stack_size);
+  }
   Report("HINT: if you don't care about these warnings you may set "
          "ASAN_OPTIONS=detect_odr_violation=0\n");
-  ReportErrorSummary("odr-violation", g1->module_name, 0, g1->name);
+  ReportErrorSummary("odr-violation", g1_loc.data(), 0, g1->name);
 }
 
 // ----------------------- CheckForInvalidPointerPair ----------- {{{1
diff --git a/lib/asan/asan_report.h b/lib/asan/asan_report.h
index 1cf7c59..374ebfb 100644
--- a/lib/asan/asan_report.h
+++ b/lib/asan/asan_report.h
@@ -43,8 +43,8 @@
                                       AllocType dealloc_type);
 void NORETURN ReportMallocUsableSizeNotOwned(uptr addr,
                                              StackTrace *stack);
-void NORETURN ReportAsanGetAllocatedSizeNotOwned(uptr addr,
-                                                 StackTrace *stack);
+void NORETURN
+ReportSanitizerGetAllocatedSizeNotOwned(uptr addr, StackTrace *stack);
 void NORETURN ReportStringFunctionMemoryRangesOverlap(
     const char *function, const char *offset1, uptr length1,
     const char *offset2, uptr length2, StackTrace *stack);
@@ -55,7 +55,8 @@
                                              uptr new_mid, StackTrace *stack);
 
 void NORETURN
-ReportODRViolation(const __asan_global *g1, const __asan_global *g2);
+ReportODRViolation(const __asan_global *g1, u32 stack_id1,
+                   const __asan_global *g2, u32 stack_id2);
 
 // Mac-specific errors and warnings.
 void WarnMacFreeUnallocated(
diff --git a/lib/asan/asan_rtl.cc b/lib/asan/asan_rtl.cc
index 3e21c87..2fe91f2 100644
--- a/lib/asan/asan_rtl.cc
+++ b/lib/asan/asan_rtl.cc
@@ -279,6 +279,7 @@
   f->start_deactivated = false;
   f->detect_invalid_pointer_pairs = 0;
   f->detect_container_overflow = true;
+  f->detect_odr_violation = 2;
 
   // Override from compile definition.
   ParseFlagsFromString(f, MaybeUseAsanDefaultOptionsCompileDefinition());
diff --git a/lib/asan/asan_stats.cc b/lib/asan/asan_stats.cc
index 5af37e2..0837bc8 100644
--- a/lib/asan/asan_stats.cc
+++ b/lib/asan/asan_stats.cc
@@ -15,6 +15,7 @@
 #include "asan_internal.h"
 #include "asan_stats.h"
 #include "asan_thread.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_mutex.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 
@@ -139,7 +140,7 @@
 // ---------------------- Interface ---------------- {{{1
 using namespace __asan;  // NOLINT
 
-uptr __asan_get_current_allocated_bytes() {
+uptr __sanitizer_get_current_allocated_bytes() {
   AsanStats stats;
   GetAccumulatedStats(&stats);
   uptr malloced = stats.malloced;
@@ -148,14 +149,20 @@
   // way we update accumulated stats.
   return (malloced > freed) ? malloced - freed : 1;
 }
+uptr __asan_get_current_allocated_bytes() {
+  return __sanitizer_get_current_allocated_bytes();
+}
 
-uptr __asan_get_heap_size() {
+uptr __sanitizer_get_heap_size() {
   AsanStats stats;
   GetAccumulatedStats(&stats);
   return stats.mmaped - stats.munmaped;
 }
+uptr __asan_get_heap_size() {
+  return __sanitizer_get_heap_size();
+}
 
-uptr __asan_get_free_bytes() {
+uptr __sanitizer_get_free_bytes() {
   AsanStats stats;
   GetAccumulatedStats(&stats);
   uptr total_free = stats.mmaped
@@ -168,10 +175,16 @@
   // way we update accumulated stats.
   return (total_free > total_used) ? total_free - total_used : 1;
 }
+uptr __asan_get_free_bytes() {
+  return __sanitizer_get_free_bytes();
+}
 
-uptr __asan_get_unmapped_bytes() {
+uptr __sanitizer_get_unmapped_bytes() {
   return 0;
 }
+uptr __asan_get_unmapped_bytes() {
+  return __sanitizer_get_unmapped_bytes();
+}
 
 void __asan_print_accumulated_stats() {
   PrintAccumulatedStats();
diff --git a/lib/asan/asan_thread.cc b/lib/asan/asan_thread.cc
index 1d45573b..48ff401 100644
--- a/lib/asan/asan_thread.cc
+++ b/lib/asan/asan_thread.cc
@@ -141,7 +141,10 @@
 }
 
 void AsanThread::Init() {
+  fake_stack_ = 0;  // Will be initialized lazily if needed.
+  CHECK_EQ(this->stack_size(), 0U);
   SetThreadStackAndTls();
+  CHECK_GT(this->stack_size(), 0U);
   CHECK(AddrIsInMem(stack_bottom_));
   CHECK(AddrIsInMem(stack_top_ - 1));
   ClearShadowForThreadStackAndTLS();
@@ -149,7 +152,6 @@
   VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(),
           (void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_,
           &local);
-  fake_stack_ = 0;  // Will be initialized lazily if needed.
   AsanPlatformThreadInit();
 }
 
diff --git a/lib/asan/scripts/asan_device_setup b/lib/asan/scripts/asan_device_setup
index db6346b..261e2c6 100755
--- a/lib/asan/scripts/asan_device_setup
+++ b/lib/asan/scripts/asan_device_setup
@@ -97,8 +97,8 @@
     ASAN_RT_PATH="$HERE"
 elif [[ $(basename "$HERE") == "bin" ]]; then
     # We could be in the toolchain's base directory.
-    # Consider ../lib and ../lib/clang/$VERSION/lib/linux.
-    P=$(ls "$HERE"/../lib/"$ASAN_RT" "$HERE"/../lib/clang/*/lib/linux/"$ASAN_RT" 2>/dev/null | sort | tail -1)
+    # Consider ../lib, ../lib/asan and ../lib/clang/$VERSION/lib/linux.
+    P=$(ls "$HERE"/../lib/"$ASAN_RT" "$HERE"/../lib/asan/"$ASAN_RT" "$HERE"/../lib/clang/*/lib/linux/"$ASAN_RT" 2>/dev/null | sort | tail -1)
     if [[ -n "$P" ]]; then
         ASAN_RT_PATH="$(dirname "$P")"
     fi
diff --git a/lib/asan/scripts/gen_asm_instrumentation.sh b/lib/asan/scripts/gen_asm_instrumentation.sh
deleted file mode 100755
index e8bee80..0000000
--- a/lib/asan/scripts/gen_asm_instrumentation.sh
+++ /dev/null
@@ -1,266 +0,0 @@
-#!/bin/bash
-
-#===- lib/asan/scripts/gen_asm_instrumentation.sh -------------------------===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-# Emit x86 instrumentation functions for asan.
-#
-#===-----------------------------------------------------------------------===#
-
-check() {
-  test $# -eq 2 || (echo "Incorrent number of arguments: $#" 1>&2 && exit 1)
-  case "$1" in
-    store) ;;
-     load) ;;
-        *) echo "Incorrect first argument: $1" 1>&2 && exit 1 ;;
-  esac
-  case "$2" in
-    [0-9]*) ;;
-         *) echo "Incorrect second argument: $2" 1>&2 && exit 1 ;;
-  esac
-}
-
-func_name() {
-  check $1 $2
-  echo "__sanitizer_sanitize_$1$2"
-}
-
-func_label() {
-  check $1 $2
-  echo ".sanitize_$1$2_done"
-}
-
-func_report() {
-  check $1 $2
-  echo "__asan_report_$1$2"
-}
-
-emit_call_report() {
-cat <<EOF
-  cld
-  emms
-  call $(func_report $1 $2)@PLT
-EOF
-}
-
-emit_stack_align() {
-cat <<EOF
-  subq \$8, %rsp
-  andq \$-16, %rsp
-EOF
-}
-
-cat <<EOF
-// This file was generated by $(basename $0). Please, do not edit
-// manually.
-EOF
-
-echo "#ifdef __linux__"
-echo ".section .text"
-
-echo "#if defined(__x86_64__) || defined(__i386__)"
-for as in 1 2 4 8 16
-do
-  for at in store load
-  do
-    echo ".globl $(func_report $at $as)"
-  done
-done
-echo "#endif //  defined(__x86_64__) || defined(__i386__)"
-
-echo "#if defined(__i386__)"
-
-# Functions for i386 1-, 2- and 4-byte accesses.
-for as in 1 2 4
-do
-  for at in store load
-  do
-cat <<EOF
-// Sanitize $as-byte $at. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl $(func_name $at $as)
-.type $(func_name $at $as), @function
-$(func_name $at $as):
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushl %edx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl \$0x3, %ecx
-  movb 0x20000000(%ecx), %cl
-  testb %cl, %cl
-  je $(func_label $at $as)
-  movl %eax, %edx
-  andl \$0x7, %edx
-EOF
-
-    case $as in
-    1) ;;
-    2) echo '  incl %edx' ;;
-    4) echo '  addl $0x3, %edx' ;;
-    *) echo "Incorrect access size: $as" 1>&2; exit 1 ;;
-    esac
-
-cat <<EOF
-  movsbl %cl, %ecx
-  cmpl %ecx, %edx
-  jl $(func_label $at $as)
-  pushl %eax
-$(emit_call_report $at $as)
-$(func_label $at $as):
-  popfl
-  popl %edx
-  popl %ecx
-  popl %eax
-  leave
-  ret
-EOF
-  done
-done
-
-# Functions for i386 8- and 16-byte accesses.
-for as in 8 16
-do
-  for at in store load
-  do
-cat <<EOF
-// Sanitize $as-byte $at. Takes one 4-byte address as an argument on
-// stack, nothing is returned.
-.globl $(func_name $at $as)
-.type $(func_name $at $as), @function
-$(func_name $at $as):
-  pushl %ebp
-  movl %esp, %ebp
-  pushl %eax
-  pushl %ecx
-  pushfl
-  movl 8(%ebp), %eax
-  movl %eax, %ecx
-  shrl \$0x3, %ecx
-EOF
-
-    case ${as} in
-      8) echo '  cmpb $0x0, 0x20000000(%ecx)' ;;
-     16) echo '  cmpw $0x0, 0x20000000(%ecx)' ;;
-      *) echo "Incorrect access size: ${as}" 1>&2; exit 1 ;;
-    esac
-
-cat <<EOF
-  je $(func_label $at $as)
-  pushl %eax
-$(emit_call_report $at $as)
-$(func_label $at $as):
-  popfl
-  popl %ecx
-  popl %eax
-  leave
-  ret
-EOF
-  done
-done
-
-echo "#endif // defined(__i386__)"
-
-echo "#if defined(__x86_64__)"
-
-# Functions for x86-64 1-, 2- and 4-byte accesses.
-for as in 1 2 4
-do
-  for at in store load
-  do
-cat <<EOF
-// Sanitize $as-byte $at. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl $(func_name $at $as)
-.type $(func_name $at $as), @function
-$(func_name $at $as):
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushq %rcx
-  pushfq
-  movq %rdi, %rax
-  shrq \$0x3, %rax
-  movb 0x7fff8000(%rax), %al
-  test %al, %al
-  je $(func_label $at $as)
-  movl %edi, %ecx
-  andl \$0x7, %ecx
-EOF
-
-    case ${as} in
-    1) ;;
-    2) echo '  incl %ecx' ;;
-    4) echo '  addl $0x3, %ecx' ;;
-    *) echo "Incorrect access size: ${as}" 1>&2; exit 1 ;;
-    esac
-
-cat <<EOF
-  movsbl %al, %eax
-  cmpl %eax, %ecx
-  jl $(func_label $at $as)
-$(emit_stack_align)
-$(emit_call_report $at $as)
-$(func_label $at $as):
-  popfq
-  popq %rcx
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-EOF
-  done
-done
-
-# Functions for x86-64 8- and 16-byte accesses.
-for as in 8 16
-do
-  for at in store load
-  do
-cat <<EOF
-// Sanitize $as-byte $at. Takes one 8-byte address as an argument in %rdi,
-// nothing is returned.
-.globl $(func_name $at $as)
-.type $(func_name $at $as), @function
-$(func_name $at $as):
-  leaq -128(%rsp), %rsp
-  pushq %rax
-  pushfq
-  movq %rdi, %rax
-  shrq \$0x3, %rax
-EOF
-
-    case ${as} in
-      8) echo '  cmpb $0x0, 0x7fff8000(%rax)' ;;
-     16) echo '  cmpw $0x0, 0x7fff8000(%rax)' ;;
-      *) echo "Incorrect access size: ${as}" 1>&2; exit 1 ;;
-    esac
-
-cat <<EOF
-  je $(func_label $at $as)
-$(emit_stack_align)
-$(emit_call_report $at $as)
-$(func_label $at $as):
-  popfq
-  popq %rax
-  leaq 128(%rsp), %rsp
-  ret
-EOF
-  done
-done
-echo "#endif // defined(__x86_64__)"
-
-cat <<EOF
-/* We do not need executable stack. */
-#if defined(__arm__)
-  .section .note.GNU-stack,"",%progbits
-#else
-  .section .note.GNU-stack,"",@progbits
-#endif // defined(__arm__)
-#endif // __linux__
-EOF
diff --git a/lib/asan/tests/CMakeLists.txt b/lib/asan/tests/CMakeLists.txt
index 470543f..c6a7041 100644
--- a/lib/asan/tests/CMakeLists.txt
+++ b/lib/asan/tests/CMakeLists.txt
@@ -21,32 +21,23 @@
   asan_test_utils.h)
 
 set(ASAN_UNITTEST_COMMON_CFLAGS
+  ${COMPILER_RT_TEST_CFLAGS}
   ${COMPILER_RT_GTEST_CFLAGS}
   -I${COMPILER_RT_SOURCE_DIR}/include
   -I${COMPILER_RT_SOURCE_DIR}/lib
   -I${COMPILER_RT_SOURCE_DIR}/lib/asan
   -I${COMPILER_RT_SOURCE_DIR}/lib/sanitizer_common/tests
+  -fno-rtti
+  -O2
   -Wno-format
-  -Werror=sign-compare
-  -O2)
-append_if(COMPILER_RT_HAS_G_FLAG -g ASAN_UNITTEST_COMMON_CFLAGS)
-append_if(COMPILER_RT_HAS_Zi_FLAG -Zi ASAN_UNITTEST_COMMON_CFLAGS)
+  -Werror=sign-compare)
 append_if(COMPILER_RT_HAS_WNO_VARIADIC_MACROS_FLAG -Wno-variadic-macros ASAN_UNITTEST_COMMON_CFLAGS)
 
-if(MSVC)
-  # MSVC system headers and gtest use a lot of deprecated stuff.
-  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS
-       -Wno-deprecated-declarations)
-
-  # clang-cl doesn't support exceptions yet.
-  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS
-       /fallback
-       -D_HAS_EXCEPTIONS=0)
-
-  # We should teach clang-cl to understand more pragmas.
-  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS
-       -Wno-unknown-pragmas
-       -Wno-undefined-inline)
+# -gline-tables-only must be enough for ASan, so use it if possible.
+if(COMPILER_RT_TEST_COMPILER_ID MATCHES "Clang")
+  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -gline-tables-only)
+else()
+  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -g)
 endif()
 
 # Use -D instead of definitions to please custom compile command.
@@ -95,8 +86,8 @@
 set(ASAN_UNITTEST_NOINST_LINKFLAGS ${ASAN_UNITTEST_COMMON_LINKFLAGS})
 append_if(COMPILER_RT_HAS_LIBM -lm ASAN_UNITTEST_NOINST_LINKFLAGS)
 append_if(COMPILER_RT_HAS_LIBDL -ldl ASAN_UNITTEST_NOINST_LINKFLAGS)
-append_if(COMPILER_RT_HAS_LIBPTHREAD -lpthread ASAN_UNITTEST_NOINST_LINKFLAGS)
-append_if(COMPILER_RT_HAS_LIBPTHREAD -lpthread
+append_if(COMPILER_RT_HAS_LIBPTHREAD -pthread ASAN_UNITTEST_NOINST_LINKFLAGS)
+append_if(COMPILER_RT_HAS_LIBPTHREAD -pthread
           ASAN_DYNAMIC_UNITTEST_INSTRUMENTED_LINKFLAGS)
 
 # Compile source for the given architecture, using compiler
diff --git a/lib/asan/tests/asan_asm_test.cc b/lib/asan/tests/asan_asm_test.cc
index 709ff5d..86f806d 100644
--- a/lib/asan/tests/asan_asm_test.cc
+++ b/lib/asan/tests/asan_asm_test.cc
@@ -12,8 +12,6 @@
 //===----------------------------------------------------------------------===//
 #include "asan_test_utils.h"
 
-// Tests for __sanitizer_sanitize_(store|load)N functions in compiler-rt.
-
 #if defined(__linux__)
 
 #if defined(__x86_64__) || (defined(__i386__) && defined(__SSE2__))
@@ -36,13 +34,10 @@
 #define DECLARE_ASM_WRITE(Type, Size, Mov, Reg)        \
 template<> void asm_write<Type>(Type *ptr, Type val) { \
   __asm__(                                             \
-    "leaq (%[ptr]), %%rdi  \n\t"                       \
-    "movabsq $__sanitizer_sanitize_store" Size ", %%r11  \n\t" \
-    "call *%%r11  \n\t"                                 \
     Mov " %[val], (%[ptr])  \n\t"                      \
     :                                                  \
     : [ptr] "r" (ptr), [val] Reg (val)                 \
-    : "memory", "rdi", "r11"                           \
+    : "memory"                                         \
   );                                                   \
 }
 
@@ -50,13 +45,10 @@
 template<> Type asm_read<Type>(Type *ptr) {        \
   Type res;                                        \
   __asm__(                                         \
-    "leaq (%[ptr]), %%rdi  \n\t"                   \
-    "movabsq $__sanitizer_sanitize_load" Size ", %%r11  \n\t" \
-    "callq *%%r11  \n\t"                           \
     Mov " (%[ptr]), %[res]  \n\t"                  \
     : [res] Reg (res)                              \
     : [ptr] "r" (ptr)                              \
-    : "memory", "rdi", "r11"                       \
+    : "memory"                                     \
   );                                               \
   return res;                                      \
 }
@@ -75,14 +67,10 @@
 #define DECLARE_ASM_WRITE(Type, Size, Mov, Reg)        \
 template<> void asm_write<Type>(Type *ptr, Type val) { \
   __asm__(                                             \
-    "leal (%[ptr]), %%eax  \n\t"                       \
-    "pushl %%eax  \n\t"                                \
-    "call __sanitizer_sanitize_store" Size "  \n\t"    \
-    "popl %%eax  \n\t"                                 \
     Mov " %[val], (%[ptr])  \n\t"                      \
     :                                                  \
     : [ptr] "r" (ptr), [val] Reg (val)                 \
-    : "memory", "eax", "esp"                           \
+    : "memory"                                         \
   );                                                   \
 }
 
@@ -90,52 +78,14 @@
 template<> Type asm_read<Type>(Type *ptr) {        \
   Type res;                                        \
   __asm__(                                         \
-    "leal (%[ptr]), %%eax  \n\t"                   \
-    "pushl %%eax  \n\t"                            \
-    "call __sanitizer_sanitize_load" Size "  \n\t" \
-    "popl %%eax  \n\t"                             \
     Mov " (%[ptr]), %[res]  \n\t"                  \
     : [res] Reg (res)                              \
     : [ptr] "r" (ptr)                              \
-    : "memory", "eax", "esp"                       \
+    : "memory"                                     \
   );                                               \
   return res;                                      \
 }
 
-template<> void asm_write<U8>(U8 *ptr, U8 val) {
-  __asm__(
-    "leal (%[ptr]), %%eax  \n\t"
-    "pushl %%eax  \n\t"
-    "call __sanitizer_sanitize_store8  \n\t"
-    "popl %%eax  \n\t"
-    "movl (%[val]), %%eax  \n\t"
-    "movl %%eax, (%[ptr])  \n\t"
-    "movl 0x4(%[val]), %%eax  \n\t"
-    "movl %%eax, 0x4(%[ptr])  \n\t"
-    :
-    : [ptr] "r" (ptr), [val] "r" (&val)
-    : "memory", "eax", "esp"
-  );
-}
-
-template<> U8 asm_read(U8 *ptr) {
-  U8 res;
-  __asm__(
-    "leal (%[ptr]), %%eax  \n\t"
-    "pushl %%eax  \n\t"
-    "call __sanitizer_sanitize_load8  \n\t"
-    "popl  %%eax  \n\t"
-    "movl (%[ptr]), %%eax  \n\t"
-    "movl %%eax, (%[res])  \n\t"
-    "movl 0x4(%[ptr]), %%eax  \n\t"
-    "movl %%eax, 0x4(%[res])  \n\t"
-    :
-    : [ptr] "r" (ptr), [res] "r" (&res)
-    : "memory", "eax", "esp"
-  );
-  return res;
-}
-
 } // End of anonymous namespace
 
 #endif  // defined(__i386__) && defined(__SSE2__)
@@ -220,13 +170,17 @@
   TestAsmWrite<U1>("WRITE of size 1");
   TestAsmWrite<U2>("WRITE of size 2");
   TestAsmWrite<U4>("WRITE of size 4");
+#if defined(__x86_64__)
   TestAsmWrite<U8>("WRITE of size 8");
+#endif // defined(__x86_64__)
   TestAsmWrite<__m128i>("WRITE of size 16");
 
   TestAsmRead<U1>("READ of size 1");
   TestAsmRead<U2>("READ of size 2");
   TestAsmRead<U4>("READ of size 4");
+#if defined(__x86_64__)
   TestAsmRead<U8>("READ of size 8");
+#endif // defined(__x86_64__)
   TestAsmRead<__m128i>("READ of size 16");
 }
 
diff --git a/lib/asan/tests/asan_interface_test.cc b/lib/asan/tests/asan_interface_test.cc
index 725711c..50fdf11 100644
--- a/lib/asan/tests/asan_interface_test.cc
+++ b/lib/asan/tests/asan_interface_test.cc
@@ -11,18 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 #include "asan_test_utils.h"
-#include "sanitizer/asan_interface.h"
+#include <sanitizer/allocator_interface.h>
+#include <sanitizer/asan_interface.h>
 
 TEST(AddressSanitizerInterface, GetEstimatedAllocatedSize) {
-  EXPECT_EQ(0U, __asan_get_estimated_allocated_size(0));
+  EXPECT_EQ(0U, __sanitizer_get_estimated_allocated_size(0));
   const size_t sizes[] = { 1, 30, 1<<30 };
   for (size_t i = 0; i < 3; i++) {
-    EXPECT_EQ(sizes[i], __asan_get_estimated_allocated_size(sizes[i]));
+    EXPECT_EQ(sizes[i], __sanitizer_get_estimated_allocated_size(sizes[i]));
   }
 }
 
 static const char* kGetAllocatedSizeErrorMsg =
-  "attempting to call __asan_get_allocated_size";
+  "attempting to call __sanitizer_get_allocated_size";
 
 TEST(AddressSanitizerInterface, GetAllocatedSizeAndOwnershipTest) {
   const size_t kArraySize = 100;
@@ -31,38 +32,41 @@
 
   // Allocated memory is owned by allocator. Allocated size should be
   // equal to requested size.
-  EXPECT_EQ(true, __asan_get_ownership(array));
-  EXPECT_EQ(kArraySize, __asan_get_allocated_size(array));
-  EXPECT_EQ(true, __asan_get_ownership(int_ptr));
-  EXPECT_EQ(sizeof(int), __asan_get_allocated_size(int_ptr));
+  EXPECT_EQ(true, __sanitizer_get_ownership(array));
+  EXPECT_EQ(kArraySize, __sanitizer_get_allocated_size(array));
+  EXPECT_EQ(true, __sanitizer_get_ownership(int_ptr));
+  EXPECT_EQ(sizeof(int), __sanitizer_get_allocated_size(int_ptr));
 
   // We cannot call GetAllocatedSize from the memory we didn't map,
   // and from the interior pointers (not returned by previous malloc).
   void *wild_addr = (void*)0x1;
-  EXPECT_FALSE(__asan_get_ownership(wild_addr));
-  EXPECT_DEATH(__asan_get_allocated_size(wild_addr), kGetAllocatedSizeErrorMsg);
-  EXPECT_FALSE(__asan_get_ownership(array + kArraySize / 2));
-  EXPECT_DEATH(__asan_get_allocated_size(array + kArraySize / 2),
+  EXPECT_FALSE(__sanitizer_get_ownership(wild_addr));
+  EXPECT_DEATH(__sanitizer_get_allocated_size(wild_addr),
+               kGetAllocatedSizeErrorMsg);
+  EXPECT_FALSE(__sanitizer_get_ownership(array + kArraySize / 2));
+  EXPECT_DEATH(__sanitizer_get_allocated_size(array + kArraySize / 2),
                kGetAllocatedSizeErrorMsg);
 
-  // NULL is not owned, but is a valid argument for __asan_get_allocated_size().
-  EXPECT_FALSE(__asan_get_ownership(NULL));
-  EXPECT_EQ(0U, __asan_get_allocated_size(NULL));
+  // NULL is not owned, but is a valid argument for
+  // __sanitizer_get_allocated_size().
+  EXPECT_FALSE(__sanitizer_get_ownership(NULL));
+  EXPECT_EQ(0U, __sanitizer_get_allocated_size(NULL));
 
   // When memory is freed, it's not owned, and call to GetAllocatedSize
   // is forbidden.
   free(array);
-  EXPECT_FALSE(__asan_get_ownership(array));
-  EXPECT_DEATH(__asan_get_allocated_size(array), kGetAllocatedSizeErrorMsg);
+  EXPECT_FALSE(__sanitizer_get_ownership(array));
+  EXPECT_DEATH(__sanitizer_get_allocated_size(array),
+               kGetAllocatedSizeErrorMsg);
   delete int_ptr;
 
   void *zero_alloc = Ident(malloc(0));
   if (zero_alloc != 0) {
     // If malloc(0) is not null, this pointer is owned and should have valid
     // allocated size.
-    EXPECT_TRUE(__asan_get_ownership(zero_alloc));
+    EXPECT_TRUE(__sanitizer_get_ownership(zero_alloc));
     // Allocated size is 0 or 1 depending on the allocator used.
-    EXPECT_LT(__asan_get_allocated_size(zero_alloc), 2U);
+    EXPECT_LT(__sanitizer_get_allocated_size(zero_alloc), 2U);
   }
   free(zero_alloc);
 }
@@ -71,14 +75,14 @@
   size_t before_malloc, after_malloc, after_free;
   char *array;
   const size_t kMallocSize = 100;
-  before_malloc = __asan_get_current_allocated_bytes();
+  before_malloc = __sanitizer_get_current_allocated_bytes();
 
   array = Ident((char*)malloc(kMallocSize));
-  after_malloc = __asan_get_current_allocated_bytes();
+  after_malloc = __sanitizer_get_current_allocated_bytes();
   EXPECT_EQ(before_malloc + kMallocSize, after_malloc);
 
   free(array);
-  after_free = __asan_get_current_allocated_bytes();
+  after_free = __sanitizer_get_current_allocated_bytes();
   EXPECT_EQ(before_malloc, after_free);
 }
 
@@ -88,11 +92,11 @@
   // otherwise it will be stuck in quarantine instead of being unmaped.
   static const size_t kLargeMallocSize = (1 << 28) + 1;  // 256M
   free(Ident(malloc(kLargeMallocSize)));  // Drain quarantine.
-  size_t old_heap_size = __asan_get_heap_size();
+  size_t old_heap_size = __sanitizer_get_heap_size();
   for (int i = 0; i < 3; i++) {
     // fprintf(stderr, "allocating %zu bytes:\n", kLargeMallocSize);
     free(Ident(malloc(kLargeMallocSize)));
-    EXPECT_EQ(old_heap_size, __asan_get_heap_size());
+    EXPECT_EQ(old_heap_size, __sanitizer_get_heap_size());
   }
 }
 
@@ -116,7 +120,7 @@
 TEST(AddressSanitizerInterface, ManyThreadsWithStatsStressTest) {
   size_t before_test, after_test, i;
   pthread_t threads[kManyThreadsNumThreads];
-  before_test = __asan_get_current_allocated_bytes();
+  before_test = __sanitizer_get_current_allocated_bytes();
   for (i = 0; i < kManyThreadsNumThreads; i++) {
     PTHREAD_CREATE(&threads[i], 0,
                    (void* (*)(void *x))ManyThreadsWithStatsWorker, (void*)i);
@@ -124,7 +128,7 @@
   for (i = 0; i < kManyThreadsNumThreads; i++) {
     PTHREAD_JOIN(threads[i], 0);
   }
-  after_test = __asan_get_current_allocated_bytes();
+  after_test = __sanitizer_get_current_allocated_bytes();
   // ASan stats also reflect memory usage of internal ASan RTL structs,
   // so we can't check for equality here.
   EXPECT_LT(after_test, before_test + (1UL<<20));
@@ -417,11 +421,11 @@
     sizes.push_back(size);
   }
   for (size_t i = 0; i < 4000000; i++) {
-    EXPECT_FALSE(__asan_get_ownership(&pointers));
-    EXPECT_FALSE(__asan_get_ownership((void*)0x1234));
+    EXPECT_FALSE(__sanitizer_get_ownership(&pointers));
+    EXPECT_FALSE(__sanitizer_get_ownership((void*)0x1234));
     size_t idx = i % kNumMallocs;
-    EXPECT_TRUE(__asan_get_ownership(pointers[idx]));
-    EXPECT_EQ(sizes[idx], __asan_get_allocated_size(pointers[idx]));
+    EXPECT_TRUE(__sanitizer_get_ownership(pointers[idx]));
+    EXPECT_EQ(sizes[idx], __sanitizer_get_allocated_size(pointers[idx]));
   }
   for (size_t i = 0, n = pointers.size(); i < n; i++)
     free(pointers[i]);
diff --git a/lib/asan/tests/asan_noinst_test.cc b/lib/asan/tests/asan_noinst_test.cc
index 8d2a6ac..7fae462 100644
--- a/lib/asan/tests/asan_noinst_test.cc
+++ b/lib/asan/tests/asan_noinst_test.cc
@@ -16,6 +16,7 @@
 #include "asan_internal.h"
 #include "asan_mapping.h"
 #include "asan_test_utils.h"
+#include <sanitizer/allocator_interface.h>
 
 #include <assert.h>
 #include <stdio.h>
@@ -175,12 +176,12 @@
 // destroyed.
 TEST(AddressSanitizer, ThreadedQuarantineTest) {
   const int n_threads = 3000;
-  size_t mmaped1 = __asan_get_heap_size();
+  size_t mmaped1 = __sanitizer_get_heap_size();
   for (int i = 0; i < n_threads; i++) {
     pthread_t t;
     PTHREAD_CREATE(&t, NULL, ThreadedQuarantineTestWorker, 0);
     PTHREAD_JOIN(t, 0);
-    size_t mmaped2 = __asan_get_heap_size();
+    size_t mmaped2 = __sanitizer_get_heap_size();
     EXPECT_LT(mmaped2 - mmaped1, 320U * (1 << 20));
   }
 }
diff --git a/lib/builtins/Makefile.mk b/lib/builtins/Makefile.mk
index 3143d91..4dbadd0 100644
--- a/lib/builtins/Makefile.mk
+++ b/lib/builtins/Makefile.mk
@@ -11,7 +11,7 @@
 SubDirs :=
 
 # Add arch specific optimized implementations.
-SubDirs += i386 ppc x86_64 arm
+SubDirs += i386 ppc x86_64 arm armv6m
 
 # Define the variables for this specific directory.
 Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
diff --git a/lib/builtins/README.txt b/lib/builtins/README.txt
new file mode 100644
index 0000000..1c08e74
--- /dev/null
+++ b/lib/builtins/README.txt
@@ -0,0 +1,343 @@
+Compiler-RT
+================================
+
+This directory and its subdirectories contain source code for the compiler
+support routines.
+
+Compiler-RT is open source software. You may freely distribute it under the
+terms of the license agreement found in LICENSE.txt.
+
+================================
+
+This is a replacement library for libgcc.  Each function is contained
+in its own file.  Each function has a corresponding unit test under
+test/Unit.
+
+A rudimentary script to test each file is in the file called
+test/Unit/test.
+
+Here is the specification for this library:
+
+http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc
+
+Here is a synopsis of the contents of this library:
+
+typedef      int si_int;
+typedef unsigned su_int;
+
+typedef          long long di_int;
+typedef unsigned long long du_int;
+
+// Integral bit manipulation
+
+di_int __ashldi3(di_int a, si_int b);      // a << b
+ti_int __ashlti3(ti_int a, si_int b);      // a << b
+
+di_int __ashrdi3(di_int a, si_int b);      // a >> b  arithmetic (sign fill)
+ti_int __ashrti3(ti_int a, si_int b);      // a >> b  arithmetic (sign fill)
+di_int __lshrdi3(di_int a, si_int b);      // a >> b  logical    (zero fill)
+ti_int __lshrti3(ti_int a, si_int b);      // a >> b  logical    (zero fill)
+
+si_int __clzsi2(si_int a);  // count leading zeros
+si_int __clzdi2(di_int a);  // count leading zeros
+si_int __clzti2(ti_int a);  // count leading zeros
+si_int __ctzsi2(si_int a);  // count trailing zeros
+si_int __ctzdi2(di_int a);  // count trailing zeros
+si_int __ctzti2(ti_int a);  // count trailing zeros
+
+si_int __ffsdi2(di_int a);  // find least significant 1 bit
+si_int __ffsti2(ti_int a);  // find least significant 1 bit
+
+si_int __paritysi2(si_int a);  // bit parity
+si_int __paritydi2(di_int a);  // bit parity
+si_int __parityti2(ti_int a);  // bit parity
+
+si_int __popcountsi2(si_int a);  // bit population
+si_int __popcountdi2(di_int a);  // bit population
+si_int __popcountti2(ti_int a);  // bit population
+
+uint32_t __bswapsi2(uint32_t a);   // a byteswapped, arm only
+uint64_t __bswapdi2(uint64_t a);   // a byteswapped, arm only
+
+// Integral arithmetic
+
+di_int __negdi2    (di_int a);                         // -a
+ti_int __negti2    (ti_int a);                         // -a
+di_int __muldi3    (di_int a, di_int b);               // a * b
+ti_int __multi3    (ti_int a, ti_int b);               // a * b
+si_int __divsi3    (si_int a, si_int b);               // a / b   signed
+di_int __divdi3    (di_int a, di_int b);               // a / b   signed
+ti_int __divti3    (ti_int a, ti_int b);               // a / b   signed
+su_int __udivsi3   (su_int n, su_int d);               // a / b   unsigned
+du_int __udivdi3   (du_int a, du_int b);               // a / b   unsigned
+tu_int __udivti3   (tu_int a, tu_int b);               // a / b   unsigned
+si_int __modsi3    (si_int a, si_int b);               // a % b   signed
+di_int __moddi3    (di_int a, di_int b);               // a % b   signed
+ti_int __modti3    (ti_int a, ti_int b);               // a % b   signed
+su_int __umodsi3   (su_int a, su_int b);               // a % b   unsigned
+du_int __umoddi3   (du_int a, du_int b);               // a % b   unsigned
+tu_int __umodti3   (tu_int a, tu_int b);               // a % b   unsigned
+du_int __udivmoddi4(du_int a, du_int b, du_int* rem);  // a / b, *rem = a % b  unsigned
+tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem);  // a / b, *rem = a % b  unsigned
+su_int __udivmodsi4(su_int a, su_int b, su_int* rem);  // a / b, *rem = a % b  unsigned
+si_int __divmodsi4(si_int a, si_int b, si_int* rem);   // a / b, *rem = a % b  signed
+
+
+
+//  Integral arithmetic with trapping overflow
+
+si_int __absvsi2(si_int a);           // abs(a)
+di_int __absvdi2(di_int a);           // abs(a)
+ti_int __absvti2(ti_int a);           // abs(a)
+
+si_int __negvsi2(si_int a);           // -a
+di_int __negvdi2(di_int a);           // -a
+ti_int __negvti2(ti_int a);           // -a
+
+si_int __addvsi3(si_int a, si_int b);  // a + b
+di_int __addvdi3(di_int a, di_int b);  // a + b
+ti_int __addvti3(ti_int a, ti_int b);  // a + b
+
+si_int __subvsi3(si_int a, si_int b);  // a - b
+di_int __subvdi3(di_int a, di_int b);  // a - b
+ti_int __subvti3(ti_int a, ti_int b);  // a - b
+
+si_int __mulvsi3(si_int a, si_int b);  // a * b
+di_int __mulvdi3(di_int a, di_int b);  // a * b
+ti_int __mulvti3(ti_int a, ti_int b);  // a * b
+
+
+// Integral arithmetic which returns if overflow
+
+si_int __mulosi4(si_int a, si_int b, int* overflow);  // a * b, overflow set to one if result not in signed range
+di_int __mulodi4(di_int a, di_int b, int* overflow);  // a * b, overflow set to one if result not in signed range
+ti_int __muloti4(ti_int a, ti_int b, int* overflow);  // a * b, overflow set to
+ one if result not in signed range
+
+
+//  Integral comparison: a  < b -> 0
+//                       a == b -> 1
+//                       a  > b -> 2
+
+si_int __cmpdi2 (di_int a, di_int b);
+si_int __cmpti2 (ti_int a, ti_int b);
+si_int __ucmpdi2(du_int a, du_int b);
+si_int __ucmpti2(tu_int a, tu_int b);
+
+//  Integral / floating point conversion
+
+di_int __fixsfdi(      float a);
+di_int __fixdfdi(     double a);
+di_int __fixxfdi(long double a);
+
+ti_int __fixsfti(      float a);
+ti_int __fixdfti(     double a);
+ti_int __fixxfti(long double a);
+uint64_t __fixtfdi(long double input);  // ppc only, doesn't match documentation
+
+su_int __fixunssfsi(      float a);
+su_int __fixunsdfsi(     double a);
+su_int __fixunsxfsi(long double a);
+
+du_int __fixunssfdi(      float a);
+du_int __fixunsdfdi(     double a);
+du_int __fixunsxfdi(long double a);
+
+tu_int __fixunssfti(      float a);
+tu_int __fixunsdfti(     double a);
+tu_int __fixunsxfti(long double a);
+uint64_t __fixunstfdi(long double input);  // ppc only
+
+float       __floatdisf(di_int a);
+double      __floatdidf(di_int a);
+long double __floatdixf(di_int a);
+long double __floatditf(int64_t a);        // ppc only
+
+float       __floattisf(ti_int a);
+double      __floattidf(ti_int a);
+long double __floattixf(ti_int a);
+
+float       __floatundisf(du_int a);
+double      __floatundidf(du_int a);
+long double __floatundixf(du_int a);
+long double __floatunditf(uint64_t a);     // ppc only
+
+float       __floatuntisf(tu_int a);
+double      __floatuntidf(tu_int a);
+long double __floatuntixf(tu_int a);
+
+//  Floating point raised to integer power
+
+float       __powisf2(      float a, si_int b);  // a ^ b
+double      __powidf2(     double a, si_int b);  // a ^ b
+long double __powixf2(long double a, si_int b);  // a ^ b
+long double __powitf2(long double a, si_int b);  // ppc only, a ^ b
+
+//  Complex arithmetic
+
+//  (a + ib) * (c + id)
+
+      float _Complex __mulsc3( float a,  float b,  float c,  float d);
+     double _Complex __muldc3(double a, double b, double c, double d);
+long double _Complex __mulxc3(long double a, long double b,
+                              long double c, long double d);
+long double _Complex __multc3(long double a, long double b,
+                              long double c, long double d); // ppc only
+
+//  (a + ib) / (c + id)
+
+      float _Complex __divsc3( float a,  float b,  float c,  float d);
+     double _Complex __divdc3(double a, double b, double c, double d);
+long double _Complex __divxc3(long double a, long double b,
+                              long double c, long double d);
+long double _Complex __divtc3(long double a, long double b,
+                              long double c, long double d);  // ppc only
+
+
+//         Runtime support
+
+// __clear_cache() is used to tell process that new instructions have been
+// written to an address range.  Necessary on processors that do not have
+// a unified instruction and data cache.
+void __clear_cache(void* start, void* end);
+
+// __enable_execute_stack() is used with nested functions when a trampoline
+// function is written onto the stack and that page range needs to be made
+// executable.
+void __enable_execute_stack(void* addr);
+
+// __gcc_personality_v0() is normally only called by the system unwinder.
+// C code (as opposed to C++) normally does not need a personality function
+// because there are no catch clauses or destructors to be run.  But there
+// is a C language extension __attribute__((cleanup(func))) which marks local
+// variables as needing the cleanup function "func" to be run when the
+// variable goes out of scope.  That includes when an exception is thrown,
+// so a personality handler is needed.  
+_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions,
+         uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject,
+         _Unwind_Context_t context);
+
+// for use with some implementations of assert() in <assert.h>
+void __eprintf(const char* format, const char* assertion_expression,
+				const char* line, const char* file);
+				
+
+
+//   Power PC specific functions
+
+// There is no C interface to the saveFP/restFP functions.  They are helper
+// functions called by the prolog and epilog of functions that need to save
+// a number of non-volatile float point registers.  
+saveFP
+restFP
+
+// PowerPC has a standard template for trampoline functions.  This function
+// generates a custom trampoline function with the specific realFunc
+// and localsPtr values.
+void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, 
+                                const void* realFunc, void* localsPtr);
+
+// adds two 128-bit double-double precision values ( x + y )
+long double __gcc_qadd(long double x, long double y);  
+
+// subtracts two 128-bit double-double precision values ( x - y )
+long double __gcc_qsub(long double x, long double y); 
+
+// multiples two 128-bit double-double precision values ( x * y )
+long double __gcc_qmul(long double x, long double y);  
+
+// divides two 128-bit double-double precision values ( x / y )
+long double __gcc_qdiv(long double a, long double b);  
+
+
+//    ARM specific functions
+
+// There is no C interface to the switch* functions.  These helper functions
+// are only needed by Thumb1 code for efficient switch table generation.
+switch16
+switch32
+switch8
+switchu8
+
+// There is no C interface to the *_vfp_d8_d15_regs functions.  There are
+// called in the prolog and epilog of Thumb1 functions.  When the C++ ABI use
+// SJLJ for exceptions, each function with a catch clause or destuctors needs
+// to save and restore all registers in it prolog and epliog.  But there is 
+// no way to access vector and high float registers from thumb1 code, so the 
+// compiler must add call outs to these helper functions in the prolog and 
+// epilog.
+restore_vfp_d8_d15_regs
+save_vfp_d8_d15_regs
+
+
+// Note: long ago ARM processors did not have floating point hardware support.
+// Floating point was done in software and floating point parameters were 
+// passed in integer registers.  When hardware support was added for floating
+// point, new *vfp functions were added to do the same operations but with 
+// floating point parameters in floating point registers.
+
+// Undocumented functions
+
+float  __addsf3vfp(float a, float b);   // Appears to return a + b
+double __adddf3vfp(double a, double b); // Appears to return a + b
+float  __divsf3vfp(float a, float b);   // Appears to return a / b
+double __divdf3vfp(double a, double b); // Appears to return a / b
+int    __eqsf2vfp(float a, float b);    // Appears to return  one
+                                        //     iff a == b and neither is NaN.
+int    __eqdf2vfp(double a, double b);  // Appears to return  one
+                                        //     iff a == b and neither is NaN.
+double __extendsfdf2vfp(float a);       // Appears to convert from
+                                        //     float to double.
+int    __fixdfsivfp(double a);          // Appears to convert from
+                                        //     double to int.
+int    __fixsfsivfp(float a);           // Appears to convert from
+                                        //     float to int.
+unsigned int __fixunssfsivfp(float a);  // Appears to convert from
+                                        //     float to unsigned int.
+unsigned int __fixunsdfsivfp(double a); // Appears to convert from
+                                        //     double to unsigned int.
+double __floatsidfvfp(int a);           // Appears to convert from
+                                        //     int to double.
+float __floatsisfvfp(int a);            // Appears to convert from
+                                        //     int to float.
+double __floatunssidfvfp(unsigned int a); // Appears to convert from
+                                        //     unisgned int to double.
+float __floatunssisfvfp(unsigned int a); // Appears to convert from
+                                        //     unisgned int to float.
+int __gedf2vfp(double a, double b);     // Appears to return __gedf2
+                                        //     (a >= b)
+int __gesf2vfp(float a, float b);       // Appears to return __gesf2
+                                        //     (a >= b)
+int __gtdf2vfp(double a, double b);     // Appears to return __gtdf2
+                                        //     (a > b)
+int __gtsf2vfp(float a, float b);       // Appears to return __gtsf2
+                                        //     (a > b)
+int __ledf2vfp(double a, double b);     // Appears to return __ledf2
+                                        //     (a <= b)
+int __lesf2vfp(float a, float b);       // Appears to return __lesf2
+                                        //     (a <= b)
+int __ltdf2vfp(double a, double b);     // Appears to return __ltdf2
+                                        //     (a < b)
+int __ltsf2vfp(float a, float b);       // Appears to return __ltsf2
+                                        //     (a < b)
+double __muldf3vfp(double a, double b); // Appears to return a * b
+float __mulsf3vfp(float a, float b);    // Appears to return a * b
+int __nedf2vfp(double a, double b);     // Appears to return __nedf2
+                                        //     (a != b)
+double __negdf2vfp(double a);           // Appears to return -a
+float __negsf2vfp(float a);             // Appears to return -a
+float __negsf2vfp(float a);             // Appears to return -a
+double __subdf3vfp(double a, double b); // Appears to return a - b
+float __subsf3vfp(float a, float b);    // Appears to return a - b
+float __truncdfsf2vfp(double a);        // Appears to convert from
+                                        //     double to float.
+int __unorddf2vfp(double a, double b);  // Appears to return __unorddf2
+int __unordsf2vfp(float a, float b);    // Appears to return __unordsf2
+
+
+Preconditions are listed for each function at the definition when there are any.
+Any preconditions reflect the specification at
+http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc.
+
+Assumptions are listed in "int_lib.h", and in individual files.  Where possible
+assumptions are checked at compile time.
diff --git a/lib/builtins/arm/bswapdi2.S b/lib/builtins/arm/bswapdi2.S
index 14070fd..c2e2ce9 100644
--- a/lib/builtins/arm/bswapdi2.S
+++ b/lib/builtins/arm/bswapdi2.S
@@ -9,12 +9,18 @@
 
 #include "../assembly.h"
 
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
 //
 // extern uint64_t __bswapdi2(uint64_t);
 //
 // Reverse all the bytes in a 64-bit integer.
 //
-.p2align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__bswapdi2)
 #if __ARM_ARCH < 6
     // before armv6 does not have "rev" instruction
diff --git a/lib/builtins/arm/bswapsi2.S b/lib/builtins/arm/bswapsi2.S
index 0fa2d98..ad09655 100644
--- a/lib/builtins/arm/bswapsi2.S
+++ b/lib/builtins/arm/bswapsi2.S
@@ -9,12 +9,18 @@
 
 #include "../assembly.h"
 
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
 //
 // extern uint32_t __bswapsi2(uint32_t);
 //
 // Reverse all the bytes in a 32-bit integer.
 //
-.p2align 2
+	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__bswapsi2)
 #if __ARM_ARCH < 6
     // before armv6 does not have "rev" instruction
diff --git a/lib/builtins/arm/clzdi2.S b/lib/builtins/arm/clzdi2.S
index 841ba7b..bcea485 100644
--- a/lib/builtins/arm/clzdi2.S
+++ b/lib/builtins/arm/clzdi2.S
@@ -14,8 +14,12 @@
 #include "../assembly.h"
 
 	.syntax unified
-
 	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+
 	.p2align	2
 DEFINE_COMPILERRT_FUNCTION(__clzdi2)
 #ifdef __ARM_FEATURE_CLZ
diff --git a/lib/builtins/arm/clzsi2.S b/lib/builtins/arm/clzsi2.S
index de53f4f..f0240b0 100644
--- a/lib/builtins/arm/clzsi2.S
+++ b/lib/builtins/arm/clzsi2.S
@@ -14,8 +14,11 @@
 #include "../assembly.h"
 
 	.syntax unified
-
 	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
 	.p2align	2
 DEFINE_COMPILERRT_FUNCTION(__clzsi2)
 #ifdef __ARM_FEATURE_CLZ
diff --git a/lib/builtins/arm/divmodsi4.S b/lib/builtins/arm/divmodsi4.S
index ff37d9f..91bb2a5 100644
--- a/lib/builtins/arm/divmodsi4.S
+++ b/lib/builtins/arm/divmodsi4.S
@@ -21,8 +21,13 @@
 #define CLEAR_FRAME_AND_RETURN \
     pop    {r4-r7, pc}
 
-.syntax unified
-.p2align 3
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+	.p2align 3
 DEFINE_COMPILERRT_FUNCTION(__divmodsi4)
 #if __ARM_ARCH_EXT_IDIV__
 	tst     r1, r1
diff --git a/lib/builtins/arm/divsi3.S b/lib/builtins/arm/divsi3.S
index 08f3aba..db47165 100644
--- a/lib/builtins/arm/divsi3.S
+++ b/lib/builtins/arm/divsi3.S
@@ -20,8 +20,13 @@
 #define CLEAR_FRAME_AND_RETURN \
     pop    {r4, r7, pc}
 
-.syntax unified
-.p2align 3
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+	.p2align 3
 // Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine.
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3)
 DEFINE_COMPILERRT_FUNCTION(__divsi3)
diff --git a/lib/builtins/arm/modsi3.S b/lib/builtins/arm/modsi3.S
index b7933ea..7ed305e 100644
--- a/lib/builtins/arm/modsi3.S
+++ b/lib/builtins/arm/modsi3.S
@@ -20,8 +20,13 @@
 #define CLEAR_FRAME_AND_RETURN \
     pop    {r4, r7, pc}
 
-.syntax unified
-.p2align 3
+	.syntax unified
+	.text
+#if __ARM_ARCH_ISA_THUMB == 2
+	.thumb
+#endif
+
+	.p2align 3
 DEFINE_COMPILERRT_FUNCTION(__modsi3)
 #if __ARM_ARCH_EXT_IDIV__
 	tst     r1, r1
diff --git a/lib/builtins/arm/udivmodsi4.S b/lib/builtins/arm/udivmodsi4.S
index bb5d29c..ddc8752 100644
--- a/lib/builtins/arm/udivmodsi4.S
+++ b/lib/builtins/arm/udivmodsi4.S
@@ -15,8 +15,8 @@
 #include "../assembly.h"
 
 	.syntax unified
-
 	.text
+
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
 #if __ARM_ARCH_EXT_IDIV__
diff --git a/lib/builtins/arm/udivsi3.S b/lib/builtins/arm/udivsi3.S
index 11c1c09..8fb1dca 100644
--- a/lib/builtins/arm/udivsi3.S
+++ b/lib/builtins/arm/udivsi3.S
@@ -15,8 +15,8 @@
 #include "../assembly.h"
 
 	.syntax unified
-
 	.text
+
 	.p2align 2
 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
 DEFINE_COMPILERRT_FUNCTION(__udivsi3)
diff --git a/lib/builtins/arm/umodsi3.S b/lib/builtins/arm/umodsi3.S
index a03afef..164646b 100644
--- a/lib/builtins/arm/umodsi3.S
+++ b/lib/builtins/arm/umodsi3.S
@@ -15,8 +15,8 @@
 #include "../assembly.h"
 
 	.syntax unified
-
 	.text
+
 	.p2align 2
 DEFINE_COMPILERRT_FUNCTION(__umodsi3)
 #if __ARM_ARCH_EXT_IDIV__
diff --git a/lib/builtins/armv6m/Makefile.mk b/lib/builtins/armv6m/Makefile.mk
new file mode 100644
index 0000000..f3c1807
--- /dev/null
+++ b/lib/builtins/armv6m/Makefile.mk
@@ -0,0 +1,20 @@
+#===- lib/builtins/arm/Makefile.mk -------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+ModuleName := builtins
+SubDirs := 
+OnlyArchs := armv6m
+
+AsmSources := $(foreach file,$(wildcard $(Dir)/*.S),$(notdir $(file)))
+Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file)))
+ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.S=%.o)
+Implementation := Optimized
+
+# FIXME: use automatic dependencies?
+Dependencies := $(wildcard lib/*.h $(Dir)/*.h)
diff --git a/lib/builtins/assembly.h b/lib/builtins/assembly.h
index 78efe3a..d415a5f 100644
--- a/lib/builtins/assembly.h
+++ b/lib/builtins/assembly.h
@@ -42,7 +42,7 @@
 #define LOCAL_LABEL(name) .L ## name
 #define SYMBOL_IS_FUNC(name)                                                   \
   .def name SEPARATOR                                                          \
-    .scl 3 SEPARATOR                                                           \
+    .scl 2 SEPARATOR                                                           \
     .type 32 SEPARATOR                                                         \
   .endef
 #define FILE_LEVEL_DIRECTIVE
diff --git a/lib/builtins/divtf3.c b/lib/builtins/divtf3.c
new file mode 100644
index 0000000..e81dab8
--- /dev/null
+++ b/lib/builtins/divtf3.c
@@ -0,0 +1,203 @@
+//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements quad-precision soft-float division
+// with the IEEE-754 default rounding (to nearest, ties to even).
+//
+// For simplicity, this implementation currently flushes denormals to zero.
+// It should be a fairly straightforward exercise to implement gradual
+// underflow with correct rounding.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) {
+
+    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+    const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit;
+
+    rep_t aSignificand = toRep(a) & significandMask;
+    rep_t bSignificand = toRep(b) & significandMask;
+    int scale = 0;
+
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+
+        const rep_t aAbs = toRep(a) & absMask;
+        const rep_t bAbs = toRep(b) & absMask;
+
+        // NaN / anything = qNaN
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        // anything / NaN = qNaN
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+
+        if (aAbs == infRep) {
+            // infinity / infinity = NaN
+            if (bAbs == infRep) return fromRep(qnanRep);
+            // infinity / anything else = +/- infinity
+            else return fromRep(aAbs | quotientSign);
+        }
+
+        // anything else / infinity = +/- 0
+        if (bAbs == infRep) return fromRep(quotientSign);
+
+        if (!aAbs) {
+            // zero / zero = NaN
+            if (!bAbs) return fromRep(qnanRep);
+            // zero / anything else = +/- zero
+            else return fromRep(quotientSign);
+        }
+        // anything else / zero = +/- infinity
+        if (!bAbs) return fromRep(infRep | quotientSign);
+
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if (aAbs < implicitBit) scale += normalize(&aSignificand);
+        if (bAbs < implicitBit) scale -= normalize(&bSignificand);
+    }
+
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    aSignificand |= implicitBit;
+    bSignificand |= implicitBit;
+    int quotientExponent = aExponent - bExponent + scale;
+
+    // Align the significand of b as a Q63 fixed-point number in the range
+    // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax
+    // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2.  This
+    // is accurate to about 3.5 binary digits.
+    const uint64_t q63b = bSignificand >> 49;
+    uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b;
+    // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2)
+
+    // Now refine the reciprocal estimate using a Newton-Raphson iteration:
+    //
+    //     x1 = x0 * (2 - x0 * b)
+    //
+    // This doubles the number of correct binary digits in the approximation
+    // with each iteration.
+    uint64_t correction64;
+    correction64 = -((rep_t)recip64 * q63b >> 64);
+    recip64 = (rep_t)recip64 * correction64 >> 63;
+    correction64 = -((rep_t)recip64 * q63b >> 64);
+    recip64 = (rep_t)recip64 * correction64 >> 63;
+    correction64 = -((rep_t)recip64 * q63b >> 64);
+    recip64 = (rep_t)recip64 * correction64 >> 63;
+    correction64 = -((rep_t)recip64 * q63b >> 64);
+    recip64 = (rep_t)recip64 * correction64 >> 63;
+    correction64 = -((rep_t)recip64 * q63b >> 64);
+    recip64 = (rep_t)recip64 * correction64 >> 63;
+
+    // recip64 might have overflowed to exactly zero in the preceeding
+    // computation if the high word of b is exactly 1.0.  This would sabotage
+    // the full-width final stage of the computation that follows, so we adjust
+    // recip64 downward by one bit.
+    recip64--;
+
+    // We need to perform one more iteration to get us to 112 binary digits;
+    // The last iteration needs to happen with extra precision.
+    const uint64_t q127blo = bSignificand << 15;
+    rep_t correction, reciprocal;
+
+    // NOTE: This operation is equivalent to __multi3, which is not implemented
+    //       in some architechure
+    rep_t r64q63, r64q127, r64cH, r64cL, dummy;
+    wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63);
+    wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127);
+
+    correction = -(r64q63 + (r64q127 >> 64));
+
+    uint64_t cHi = correction >> 64;
+    uint64_t cLo = correction;
+
+    wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH);
+    wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL);
+
+    reciprocal = r64cH + (r64cL >> 64);
+
+    // We already adjusted the 64-bit estimate, now we need to adjust the final
+    // 128-bit reciprocal estimate downward to ensure that it is strictly smaller
+    // than the infinitely precise exact reciprocal.  Because the computation
+    // of the Newton-Raphson step is truncating at every step, this adjustment
+    // is small; most of the work is already done.
+    reciprocal -= 2;
+
+    // The numerical reciprocal is accurate to within 2^-112, lies in the
+    // interval [0.5, 1.0), and is strictly smaller than the true reciprocal
+    // of b.  Multiplying a by this reciprocal thus gives a numerical q = a/b
+    // in Q127 with the following properties:
+    //
+    //    1. q < a/b
+    //    2. q is in the interval [0.5, 2.0)
+    //    3. the error in q is bounded away from 2^-113 (actually, we have a
+    //       couple of bits to spare, but this is all we need).
+
+    // We need a 128 x 128 multiply high to compute q, which isn't a basic
+    // operation in C, so we need to be a little bit fussy.
+    rep_t quotient, quotientLo;
+    wideMultiply(aSignificand << 2, reciprocal, &quotient, &quotientLo);
+
+    // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0).
+    // In either case, we are going to compute a residual of the form
+    //
+    //     r = a - q*b
+    //
+    // We know from the construction of q that r satisfies:
+    //
+    //     0 <= r < ulp(q)*b
+    //
+    // if r is greater than 1/2 ulp(q)*b, then q rounds up.  Otherwise, we
+    // already have the correct result.  The exact halfway case cannot occur.
+    // We also take this time to right shift quotient if it falls in the [1,2)
+    // range and adjust the exponent accordingly.
+    rep_t residual;
+    rep_t qb;
+
+    if (quotient < (implicitBit << 1)) {
+        wideMultiply(quotient, bSignificand, &dummy, &qb);
+        residual = (aSignificand << 113) - qb;
+        quotientExponent--;
+    } else {
+        quotient >>= 1;
+        wideMultiply(quotient, bSignificand, &dummy, &qb);
+        residual = (aSignificand << 112) - qb;
+    }
+
+    const int writtenExponent = quotientExponent + exponentBias;
+
+    if (writtenExponent >= maxExponent) {
+        // If we have overflowed the exponent, return infinity.
+        return fromRep(infRep | quotientSign);
+    }
+    else if (writtenExponent < 1) {
+        // Flush denormals to zero.  In the future, it would be nice to add
+        // code to round them correctly.
+        return fromRep(quotientSign);
+    }
+    else {
+        const bool round = (residual << 1) >= bSignificand;
+        // Clear the implicit bit
+        rep_t absResult = quotient & significandMask;
+        // Insert the exponent
+        absResult |= (rep_t)writtenExponent << significandBits;
+        // Round
+        absResult += round;
+        // Insert the sign and return
+        const long double result = fromRep(absResult | quotientSign);
+        return result;
+    }
+}
+
+#endif
diff --git a/lib/builtins/extenddftf2.c b/lib/builtins/extenddftf2.c
new file mode 100644
index 0000000..86dab8f
--- /dev/null
+++ b/lib/builtins/extenddftf2.c
@@ -0,0 +1,23 @@
+//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_DOUBLE
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI long double __extenddftf2(double a) {
+    return __extendXfYf2__(a);
+}
+
+#endif
diff --git a/lib/builtins/extendsfdf2.c b/lib/builtins/extendsfdf2.c
index 9e4c77b..7a267c2 100644
--- a/lib/builtins/extendsfdf2.c
+++ b/lib/builtins/extendsfdf2.c
@@ -7,132 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a fairly generic conversion from a narrower to a wider
-// IEEE-754 floating-point type.  The constants and types defined following the
-// includes below parameterize the conversion.
-//
-// This routine can be trivially adapted to support conversions from 
-// half-precision or to quad-precision. It does not support types that don't
-// use the usual IEEE-754 interchange formats; specifically, some work would be
-// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
-// double-double format.
-//
-// Note please, however, that this implementation is only intended to support
-// *widening* operations; if you need to convert to a *narrower* floating-point
-// type (e.g. double -> float), then this routine will not do what you want it
-// to.
-//
-// It also requires that integer types at least as large as both formats
-// are available on the target platform; this may pose a problem when trying
-// to add support for quad on some 32-bit systems, for example.  You also may
-// run into trouble finding an appropriate CLZ function for wide source types;
-// you will likely need to roll your own on some platforms.
-//
-// Finally, the following assumptions are made:
-//
-// 1. floating-point types and integer types have the same endianness on the
-//    target platform
-//
-// 2. quiet NaNs, if supported, are indicated by the leading bit of the
-//    significand field being set
-//
-//===----------------------------------------------------------------------===//
 
-#include "int_lib.h"
-
-typedef float src_t;
-typedef uint32_t src_rep_t;
-#define SRC_REP_C UINT32_C
-static const int srcSigBits = 23;
-#define src_rep_t_clz __builtin_clz
-
-typedef double dst_t;
-typedef uint64_t dst_rep_t;
-#define DST_REP_C UINT64_C
-static const int dstSigBits = 52;
-
-// End of specialization parameters.  Two helper routines for conversion to and
-// from the representation of floating-point data as integer values follow.
-
-static inline src_rep_t srcToRep(src_t x) {
-    const union { src_t f; src_rep_t i; } rep = {.f = x};
-    return rep.i;
-}
-
-static inline dst_t dstFromRep(dst_rep_t x) {
-    const union { dst_t f; dst_rep_t i; } rep = {.i = x};
-    return rep.f;
-}
-
-// End helper routines.  Conversion implementation follows.
+#define SRC_SINGLE
+#define DST_DOUBLE
+#include "fp_extend_impl.inc"
 
 ARM_EABI_FNALIAS(f2d, extendsfdf2)
 
-COMPILER_RT_ABI dst_t
-__extendsfdf2(src_t a) {
-    
-    // Various constants whose values follow from the type parameters.
-    // Any reasonable optimizer will fold and propagate all of these.
-    const int srcBits = sizeof(src_t)*CHAR_BIT;
-    const int srcExpBits = srcBits - srcSigBits - 1;
-    const int srcInfExp = (1 << srcExpBits) - 1;
-    const int srcExpBias = srcInfExp >> 1;
-    
-    const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
-    const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
-    const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
-    const src_rep_t srcAbsMask = srcSignMask - 1;
-    const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
-    const src_rep_t srcNaNCode = srcQNaN - 1;
-    
-    const int dstBits = sizeof(dst_t)*CHAR_BIT;
-    const int dstExpBits = dstBits - dstSigBits - 1;
-    const int dstInfExp = (1 << dstExpBits) - 1;
-    const int dstExpBias = dstInfExp >> 1;
-    
-    const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits;
-    
-    // Break a into a sign and representation of the absolute value
-    const src_rep_t aRep = srcToRep(a);
-    const src_rep_t aAbs = aRep & srcAbsMask;
-    const src_rep_t sign = aRep & srcSignMask;
-    dst_rep_t absResult;
-    
-    if (aAbs - srcMinNormal < srcInfinity - srcMinNormal) {
-        // a is a normal number.
-        // Extend to the destination type by shifting the significand and
-        // exponent into the proper position and rebiasing the exponent.
-        absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
-        absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
-    }
-    
-    else if (aAbs >= srcInfinity) {
-        // a is NaN or infinity.
-        // Conjure the result by beginning with infinity, then setting the qNaN
-        // bit (if needed) and right-aligning the rest of the trailing NaN
-        // payload field.
-        absResult = (dst_rep_t)dstInfExp << dstSigBits;
-        absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
-        absResult |= aAbs & srcNaNCode;
-    }
-    
-    else if (aAbs) {
-        // a is denormal.
-        // renormalize the significand and clear the leading bit, then insert
-        // the correct adjusted exponent in the destination type.
-        const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
-        absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
-        absResult ^= dstMinNormal;
-        const int resultExponent = dstExpBias - srcExpBias - scale + 1;
-        absResult |= (dst_rep_t)resultExponent << dstSigBits;
-    }
-
-    else {
-        // a is zero.
-        absResult = 0;
-    }
-    
-    // Apply the signbit to (dst_t)abs(a).
-    const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits);
-    return dstFromRep(result);
+COMPILER_RT_ABI double __extendsfdf2(float a) {
+    return __extendXfYf2__(a);
 }
diff --git a/lib/builtins/extendsftf2.c b/lib/builtins/extendsftf2.c
new file mode 100644
index 0000000..2eeeba2
--- /dev/null
+++ b/lib/builtins/extendsftf2.c
@@ -0,0 +1,23 @@
+//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_SINGLE
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI long double __extendsftf2(float a) {
+    return __extendXfYf2__(a);
+}
+
+#endif
diff --git a/lib/builtins/fp_extend.h b/lib/builtins/fp_extend.h
new file mode 100644
index 0000000..fff676e
--- /dev/null
+++ b/lib/builtins/fp_extend.h
@@ -0,0 +1,76 @@
+//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination setting
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_EXTEND_HEADER
+#define FP_EXTEND_HEADER
+
+#include "int_lib.h"
+
+#if defined SRC_SINGLE
+typedef float src_t;
+typedef uint32_t src_rep_t;
+#define SRC_REP_C UINT32_C
+static const int srcSigBits = 23;
+#define src_rep_t_clz __builtin_clz
+
+#elif defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+#define SRC_REP_C UINT64_C
+static const int srcSigBits = 52;
+static inline int src_rep_t_clz(src_rep_t a) {
+#if defined __LP64__
+    return __builtin_clzl(a);
+#else
+    if (a & REP_C(0xffffffff00000000))
+        return __builtin_clz(a >> 32);
+    else
+        return 32 + __builtin_clz(a & REP_C(0xffffffff));
+#endif
+}
+
+#else
+#error Source should be single precision or double precision!
+#endif //end source precision
+
+#if defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstSigBits = 52;
+
+#elif defined DST_QUAD
+typedef long double dst_t;
+typedef __uint128_t dst_rep_t;
+#define DST_REP_C (__uint128_t)
+static const int dstSigBits = 112;
+
+#else
+#error Destination should be double precision or quad precision!
+#endif //end destination precision
+
+// End of specialization parameters.  Two helper routines for conversion to and
+// from the representation of floating-point data as integer values follow.
+
+static inline src_rep_t srcToRep(src_t x) {
+    const union { src_t f; src_rep_t i; } rep = {.f = x};
+    return rep.i;
+}
+
+static inline dst_t dstFromRep(dst_rep_t x) {
+    const union { dst_t f; dst_rep_t i; } rep = {.i = x};
+    return rep.f;
+}
+// End helper routines.  Conversion implementation follows.
+
+#endif //FP_EXTEND_HEADER
diff --git a/lib/builtins/fp_extend_impl.inc b/lib/builtins/fp_extend_impl.inc
new file mode 100644
index 0000000..f6953ff
--- /dev/null
+++ b/lib/builtins/fp_extend_impl.inc
@@ -0,0 +1,106 @@
+//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a fairly generic conversion from a narrower to a wider
+// IEEE-754 floating-point type.  The constants and types defined following the
+// includes below parameterize the conversion.
+//
+// It does not support types that don't use the usual IEEE-754 interchange
+// formats; specifically, some work would be needed to adapt it to
+// (for example) the Intel 80-bit format or PowerPC double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *widening* operations; if you need to convert to a *narrower* floating-point
+// type (e.g. double -> float), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example.  You also may
+// run into trouble finding an appropriate CLZ function for wide source types;
+// you will likely need to roll your own on some platforms.
+//
+// Finally, the following assumptions are made:
+//
+// 1. floating-point types and integer types have the same endianness on the
+//    target platform
+//
+// 2. quiet NaNs, if supported, are indicated by the leading bit of the
+//    significand field being set
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_extend.h"
+
+static inline dst_t __extendXfYf2__(src_t a) {
+    // Various constants whose values follow from the type parameters.
+    // Any reasonable optimizer will fold and propagate all of these.
+    const int srcBits = sizeof(src_t)*CHAR_BIT;
+    const int srcExpBits = srcBits - srcSigBits - 1;
+    const int srcInfExp = (1 << srcExpBits) - 1;
+    const int srcExpBias = srcInfExp >> 1;
+
+    const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
+    const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
+    const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
+    const src_rep_t srcAbsMask = srcSignMask - 1;
+    const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
+    const src_rep_t srcNaNCode = srcQNaN - 1;
+
+    const int dstBits = sizeof(dst_t)*CHAR_BIT;
+    const int dstExpBits = dstBits - dstSigBits - 1;
+    const int dstInfExp = (1 << dstExpBits) - 1;
+    const int dstExpBias = dstInfExp >> 1;
+
+    const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits;
+
+    // Break a into a sign and representation of the absolute value
+    const src_rep_t aRep = srcToRep(a);
+    const src_rep_t aAbs = aRep & srcAbsMask;
+    const src_rep_t sign = aRep & srcSignMask;
+    dst_rep_t absResult;
+
+    if (aAbs - srcMinNormal < srcInfinity - srcMinNormal) {
+        // a is a normal number.
+        // Extend to the destination type by shifting the significand and
+        // exponent into the proper position and rebiasing the exponent.
+        absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
+        absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
+    }
+
+    else if (aAbs >= srcInfinity) {
+        // a is NaN or infinity.
+        // Conjure the result by beginning with infinity, then setting the qNaN
+        // bit (if needed) and right-aligning the rest of the trailing NaN
+        // payload field.
+        absResult = (dst_rep_t)dstInfExp << dstSigBits;
+        absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
+        absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
+    }
+
+    else if (aAbs) {
+        // a is denormal.
+        // renormalize the significand and clear the leading bit, then insert
+        // the correct adjusted exponent in the destination type.
+        const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
+        absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
+        absResult ^= dstMinNormal;
+        const int resultExponent = dstExpBias - srcExpBias - scale + 1;
+        absResult |= (dst_rep_t)resultExponent << dstSigBits;
+    }
+
+    else {
+        // a is zero.
+        absResult = 0;
+    }
+
+    // Apply the signbit to (dst_t)abs(a).
+    const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits);
+    return dstFromRep(result);
+}
diff --git a/lib/builtins/fp_lib.h b/lib/builtins/fp_lib.h
index 7b90518..faebb99 100644
--- a/lib/builtins/fp_lib.h
+++ b/lib/builtins/fp_lib.h
@@ -26,6 +26,18 @@
 #include <limits.h>
 #include "int_lib.h"
 
+// x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in
+// 32-bit mode.
+#if defined(__FreeBSD__) && defined(__i386__)
+# include <sys/param.h>
+# if __FreeBSD_version < 903000  // v9.3
+#  define uint64_t unsigned long long
+#  define int64_t long long
+#  undef UINT64_C
+#  define UINT64_C(c) (c ## ULL)
+# endif
+#endif
+
 #if defined SINGLE_PRECISION
 
 typedef uint32_t rep_t;
diff --git a/lib/builtins/fp_trunc.h b/lib/builtins/fp_trunc.h
new file mode 100644
index 0000000..49a9aeb
--- /dev/null
+++ b/lib/builtins/fp_trunc.h
@@ -0,0 +1,64 @@
+//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Set source and destination precision setting
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FP_TRUNC_HEADER
+#define FP_TRUNC_HEADER
+
+#include "int_lib.h"
+
+#if defined SRC_DOUBLE
+typedef double src_t;
+typedef uint64_t src_rep_t;
+#define SRC_REP_C UINT64_C
+static const int srcSigBits = 52;
+
+#elif defined SRC_QUAD
+typedef long double src_t;
+typedef __uint128_t src_rep_t;
+#define SRC_REP_C (__uint128_t)
+static const int srcSigBits = 112;
+
+#else
+#error Source should be double precision or quad precision!
+#endif //end source precision
+
+#if defined DST_DOUBLE
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstSigBits = 52;
+
+#elif defined DST_SINGLE
+typedef float dst_t;
+typedef uint32_t dst_rep_t;
+#define DST_REP_C UINT32_C
+static const int dstSigBits = 23;
+
+#else
+#error Destination should be single precision or double precision!
+#endif //end destination precision
+
+// End of specialization parameters.  Two helper routines for conversion to and
+// from the representation of floating-point data as integer values follow.
+
+static inline src_rep_t srcToRep(src_t x) {
+    const union { src_t f; src_rep_t i; } rep = {.f = x};
+    return rep.i;
+}
+
+static inline dst_t dstFromRep(dst_rep_t x) {
+    const union { dst_t f; dst_rep_t i; } rep = {.i = x};
+    return rep.f;
+}
+
+#endif // FP_TRUNC_HEADER
diff --git a/lib/builtins/fp_trunc_impl.inc b/lib/builtins/fp_trunc_impl.inc
new file mode 100644
index 0000000..21bffae
--- /dev/null
+++ b/lib/builtins/fp_trunc_impl.inc
@@ -0,0 +1,135 @@
+//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a fairly generic conversion from a wider to a narrower
+// IEEE-754 floating-point type in the default (round to nearest, ties to even)
+// rounding mode.  The constants and types defined following the includes below
+// parameterize the conversion.
+//
+// This routine can be trivially adapted to support conversions to
+// half-precision or from quad-precision. It does not support types that don't
+// use the usual IEEE-754 interchange formats; specifically, some work would be
+// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
+// double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *narrowing* operations; if you need to convert to a *wider* floating-point
+// type (e.g. float -> double), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example.
+//
+// Finally, the following assumptions are made:
+//
+// 1. floating-point types and integer types have the same endianness on the
+//    target platform
+//
+// 2. quiet NaNs, if supported, are indicated by the leading bit of the
+//    significand field being set
+//
+//===----------------------------------------------------------------------===//
+
+#include "fp_trunc.h"
+
+static inline dst_t __truncXfYf2__(src_t a) {
+    // Various constants whose values follow from the type parameters.
+    // Any reasonable optimizer will fold and propagate all of these.
+    const int srcBits = sizeof(src_t)*CHAR_BIT;
+    const int srcExpBits = srcBits - srcSigBits - 1;
+    const int srcInfExp = (1 << srcExpBits) - 1;
+    const int srcExpBias = srcInfExp >> 1;
+
+    const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
+    const src_rep_t srcSignificandMask = srcMinNormal - 1;
+    const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
+    const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
+    const src_rep_t srcAbsMask = srcSignMask - 1;
+    const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1;
+    const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1);
+    const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
+    const src_rep_t srcNaNCode = srcQNaN - 1;
+
+    const int dstBits = sizeof(dst_t)*CHAR_BIT;
+    const int dstExpBits = dstBits - dstSigBits - 1;
+    const int dstInfExp = (1 << dstExpBits) - 1;
+    const int dstExpBias = dstInfExp >> 1;
+
+    const int underflowExponent = srcExpBias + 1 - dstExpBias;
+    const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
+    const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits;
+    const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits;
+
+    const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1);
+    const dst_rep_t dstNaNCode = dstQNaN - 1;
+
+    // Break a into a sign and representation of the absolute value
+    const src_rep_t aRep = srcToRep(a);
+    const src_rep_t aAbs = aRep & srcAbsMask;
+    const src_rep_t sign = aRep & srcSignMask;
+    dst_rep_t absResult;
+
+    if (aAbs - underflow < aAbs - overflow) {
+        // The exponent of a is within the range of normal numbers in the
+        // destination format.  We can convert by simply right-shifting with
+        // rounding and adjusting the exponent.
+        absResult = aAbs >> (srcSigBits - dstSigBits);
+        absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits;
+
+        const src_rep_t roundBits = aAbs & roundMask;
+        // Round to nearest
+        if (roundBits > halfway)
+            absResult++;
+        // Ties to even
+        else if (roundBits == halfway)
+            absResult += absResult & 1;
+    }
+    else if (aAbs > srcInfinity) {
+        // a is NaN.
+        // Conjure the result by beginning with infinity, setting the qNaN
+        // bit and inserting the (truncated) trailing NaN field.
+        absResult = (dst_rep_t)dstInfExp << dstSigBits;
+        absResult |= dstQNaN;
+        absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode;
+    }
+    else if (aAbs > overflow) {
+        // a overflows to infinity.
+        absResult = (dst_rep_t)dstInfExp << dstSigBits;
+    }
+    else {
+        // a underflows on conversion to the destination type or is an exact
+        // zero.  The result may be a denormal or zero.  Extract the exponent
+        // to get the shift amount for the denormalization.
+        const int aExp = aAbs >> srcSigBits;
+        const int shift = srcExpBias - dstExpBias - aExp + 1;
+
+        const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal;
+
+        // Right shift by the denormalization amount with sticky.
+        if (shift > srcSigBits) {
+            absResult = 0;
+        } else {
+            const bool sticky = significand << (srcBits - shift);
+            src_rep_t denormalizedSignificand = significand >> shift | sticky;
+            absResult = denormalizedSignificand >> (srcSigBits - dstSigBits);
+            const src_rep_t roundBits = denormalizedSignificand & roundMask;
+            // Round to nearest
+            if (roundBits > halfway)
+                absResult++;
+            // Ties to even
+            else if (roundBits == halfway)
+                absResult += absResult & 1;
+        }
+    }
+
+    // Apply the signbit to (dst_t)abs(a).
+    const dst_rep_t result = absResult | sign >> (srcBits - dstBits);
+    return dstFromRep(result);
+}
diff --git a/lib/builtins/truncdfsf2.c b/lib/builtins/truncdfsf2.c
index 61c909a..46ec11d 100644
--- a/lib/builtins/truncdfsf2.c
+++ b/lib/builtins/truncdfsf2.c
@@ -6,163 +6,13 @@
 // Source Licenses. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements a fairly generic conversion from a wider to a narrower
-// IEEE-754 floating-point type in the default (round to nearest, ties to even)
-// rounding mode.  The constants and types defined following the includes below
-// parameterize the conversion.
-//
-// This routine can be trivially adapted to support conversions to 
-// half-precision or from quad-precision. It does not support types that don't
-// use the usual IEEE-754 interchange formats; specifically, some work would be
-// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
-// double-double format.
-//
-// Note please, however, that this implementation is only intended to support
-// *narrowing* operations; if you need to convert to a *wider* floating-point
-// type (e.g. float -> double), then this routine will not do what you want it
-// to.
-//
-// It also requires that integer types at least as large as both formats
-// are available on the target platform; this may pose a problem when trying
-// to add support for quad on some 32-bit systems, for example.
-//
-// Finally, the following assumptions are made:
-//
-// 1. floating-point types and integer types have the same endianness on the
-//    target platform
-//
-// 2. quiet NaNs, if supported, are indicated by the leading bit of the
-//    significand field being set
-//
-//===----------------------------------------------------------------------===//
 
-#include "int_lib.h"
-
-typedef double src_t;
-typedef uint64_t src_rep_t;
-#define SRC_REP_C UINT64_C
-static const int srcSigBits = 52;
-
-typedef float dst_t;
-typedef uint32_t dst_rep_t;
-#define DST_REP_C UINT32_C
-static const int dstSigBits = 23;
-
-// End of specialization parameters.  Two helper routines for conversion to and
-// from the representation of floating-point data as integer values follow.
-
-static inline src_rep_t srcToRep(src_t x) {
-    const union { src_t f; src_rep_t i; } rep = {.f = x};
-    return rep.i;
-}
-
-static inline dst_t dstFromRep(dst_rep_t x) {
-    const union { dst_t f; dst_rep_t i; } rep = {.i = x};
-    return rep.f;
-}
-
-// End helper routines.  Conversion implementation follows.
+#define SRC_DOUBLE
+#define DST_SINGLE
+#include "fp_trunc_impl.inc"
 
 ARM_EABI_FNALIAS(d2f, truncdfsf2)
 
-COMPILER_RT_ABI dst_t
-__truncdfsf2(src_t a) {
-    
-    // Various constants whose values follow from the type parameters.
-    // Any reasonable optimizer will fold and propagate all of these.
-    const int srcBits = sizeof(src_t)*CHAR_BIT;
-    const int srcExpBits = srcBits - srcSigBits - 1;
-    const int srcInfExp = (1 << srcExpBits) - 1;
-    const int srcExpBias = srcInfExp >> 1;
-    
-    const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
-    const src_rep_t significandMask = srcMinNormal - 1;
-    const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
-    const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
-    const src_rep_t srcAbsMask = srcSignMask - 1;
-    const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1;
-    const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1);
-    
-    const int dstBits = sizeof(dst_t)*CHAR_BIT;
-    const int dstExpBits = dstBits - dstSigBits - 1;
-    const int dstInfExp = (1 << dstExpBits) - 1;
-    const int dstExpBias = dstInfExp >> 1;
-    
-    const int underflowExponent = srcExpBias + 1 - dstExpBias;
-    const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
-    const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits;
-    const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits;
-    
-    const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1);
-    const dst_rep_t dstNaNCode = dstQNaN - 1;
-
-    // Break a into a sign and representation of the absolute value
-    const src_rep_t aRep = srcToRep(a);
-    const src_rep_t aAbs = aRep & srcAbsMask;
-    const src_rep_t sign = aRep & srcSignMask;
-    dst_rep_t absResult;
-    
-    if (aAbs - underflow < aAbs - overflow) {
-        // The exponent of a is within the range of normal numbers in the
-        // destination format.  We can convert by simply right-shifting with
-        // rounding and adjusting the exponent.
-        absResult = aAbs >> (srcSigBits - dstSigBits);
-        absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits;
-        
-        const src_rep_t roundBits = aAbs & roundMask;
-        
-        // Round to nearest
-        if (roundBits > halfway)
-            absResult++;
-        
-        // Ties to even
-        else if (roundBits == halfway)
-            absResult += absResult & 1;
-    }
-    
-    else if (aAbs > srcInfinity) {
-        // a is NaN.
-        // Conjure the result by beginning with infinity, setting the qNaN
-        // bit and inserting the (truncated) trailing NaN field.
-        absResult = (dst_rep_t)dstInfExp << dstSigBits;
-        absResult |= dstQNaN;
-        absResult |= aAbs & dstNaNCode;
-    }
-    
-    else if (aAbs > overflow) {
-        // a overflows to infinity.
-        absResult = (dst_rep_t)dstInfExp << dstSigBits;
-    }
-    
-    else {
-        // a underflows on conversion to the destination type or is an exact
-        // zero.  The result may be a denormal or zero.  Extract the exponent
-        // to get the shift amount for the denormalization.
-        const int aExp = aAbs >> srcSigBits;
-        const int shift = srcExpBias - dstExpBias - aExp + 1;
-        
-        const src_rep_t significand = (aRep & significandMask) | srcMinNormal;
-        
-        // Right shift by the denormalization amount with sticky.
-        if (shift > srcSigBits) {
-            absResult = 0;
-        } else {
-            const bool sticky = significand << (srcBits - shift);
-            src_rep_t denormalizedSignificand = significand >> shift | sticky;
-            absResult = denormalizedSignificand >> (srcSigBits - dstSigBits);
-            const src_rep_t roundBits = denormalizedSignificand & roundMask;
-            // Round to nearest
-            if (roundBits > halfway)
-                absResult++;
-            // Ties to even
-            else if (roundBits == halfway)
-                absResult += absResult & 1;
-        }
-    }
-    
-    // Apply the signbit to (dst_t)abs(a).
-    const dst_rep_t result = absResult | sign >> (srcBits - dstBits);
-    return dstFromRep(result);
-    
+COMPILER_RT_ABI float __truncdfsf2(double a) {
+    return __truncXfYf2__(a);
 }
diff --git a/lib/builtins/trunctfdf2.c b/lib/builtins/trunctfdf2.c
new file mode 100644
index 0000000..741a71b
--- /dev/null
+++ b/lib/builtins/trunctfdf2.c
@@ -0,0 +1,22 @@
+//===-- lib/truncdfsf2.c - quad -> double conversion --------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_QUAD
+#define DST_DOUBLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI double __trunctfdf2(long double a) {
+    return __truncXfYf2__(a);
+}
+
+#endif
diff --git a/lib/builtins/trunctfsf2.c b/lib/builtins/trunctfsf2.c
new file mode 100644
index 0000000..de96c1d
--- /dev/null
+++ b/lib/builtins/trunctfsf2.c
@@ -0,0 +1,22 @@
+//===-- lib/trunctfsf2.c - quad -> single conversion --------------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define QUAD_PRECISION
+#include "fp_lib.h"
+
+#if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT)
+#define SRC_QUAD
+#define DST_SINGLE
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI float __trunctfsf2(long double a) {
+    return __truncXfYf2__(a);
+}
+
+#endif
diff --git a/lib/builtins/x86_64/floatundidf.S b/lib/builtins/x86_64/floatundidf.S
index 28babfd..7c94231 100644
--- a/lib/builtins/x86_64/floatundidf.S
+++ b/lib/builtins/x86_64/floatundidf.S
@@ -17,14 +17,20 @@
 
 #ifdef __x86_64__
 
-#ifndef __ELF__
-.const
+#if defined(__APPLE__)
+	.const
+#elif defined(__ELF__)
+	.rodata
+#else
+	.section .rdata,"rd"
 #endif
-.balign 4
-twop52: .quad 0x4330000000000000
+	.balign 4
+twop52:
+	.quad 0x4330000000000000
 twop84_plus_twop52:
-		.quad 0x4530000000100000
-twop84: .quad 0x4530000000000000
+	.quad 0x4530000000100000
+twop84:
+	.quad 0x4530000000000000
 
 #define REL_ADDR(_a)	(_a)(%rip)
 
diff --git a/lib/builtins/x86_64/floatundisf.S b/lib/builtins/x86_64/floatundisf.S
index b5ca4f3..c840913 100644
--- a/lib/builtins/x86_64/floatundisf.S
+++ b/lib/builtins/x86_64/floatundisf.S
@@ -7,10 +7,15 @@
 
 #ifdef __x86_64__
 
-#ifndef __ELF__
-.literal4
+#if defined(__APPLE__)
+	.literal4
+#elif defined(__ELF__)
+	.rodata
+#else
+	.section .rdata,"rd"
 #endif
-two: .single 2.0
+two:
+	.single 2.0
 
 #define REL_ADDR(_a)	(_a)(%rip)
 
diff --git a/lib/builtins/x86_64/floatundixf.S b/lib/builtins/x86_64/floatundixf.S
index 36b837c..6603935 100644
--- a/lib/builtins/x86_64/floatundixf.S
+++ b/lib/builtins/x86_64/floatundixf.S
@@ -7,16 +7,22 @@
 
 #ifdef __x86_64__
 
-#ifndef __ELF__
-.const
+#if defined(__APPLE__)
+	.const
+#elif defined(__ELF__)
+	.rodata
+#else
+	.section .rdata,"rd"
 #endif
-.balign 4
-twop64: .quad 0x43f0000000000000
+	.balign 4
+twop64:
+	.quad 0x43f0000000000000
 
 #define REL_ADDR(_a)	(_a)(%rip)
 
-.text
-.balign 4
+	.text
+
+	.balign 4
 DEFINE_COMPILERRT_FUNCTION(__floatundixf)
 	movq	%rdi,	 -8(%rsp)
 	fildq	-8(%rsp)
@@ -35,12 +41,20 @@
 
 #ifdef __x86_64__
 
-.const
-.balign 4
-twop52: .quad 0x4330000000000000
+#if defined(__APPLE__)
+	.const
+#elif defined(__ELF__)
+	.rdata
+#else
+	.section .rdata,"rd"
+#endif
+	.balign 4
+twop52:
+	.quad 0x4330000000000000
 twop84_plus_twop52_neg:
-		.quad 0xc530000000100000
-twop84: .quad 0x4530000000000000
+	.quad 0xc530000000100000
+twop84:
+	.quad 0x4530000000000000
 
 #define REL_ADDR(_a)	(_a)(%rip)
 
diff --git a/lib/dfsan/dfsan.h b/lib/dfsan/dfsan.h
index 92a1357..ffa98d8 100644
--- a/lib/dfsan/dfsan.h
+++ b/lib/dfsan/dfsan.h
@@ -28,6 +28,7 @@
 };
 
 extern "C" {
+void dfsan_add_label(dfsan_label label, void *addr, uptr size);
 void dfsan_set_label(dfsan_label label, void *addr, uptr size);
 dfsan_label dfsan_read_label(const void *addr, uptr size);
 dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2);
diff --git a/lib/interception/interception.h b/lib/interception/interception.h
index ac982f7..743c88d 100644
--- a/lib/interception/interception.h
+++ b/lib/interception/interception.h
@@ -135,6 +135,16 @@
     extern "C" ret_type func(__VA_ARGS__);
 # define DECLARE_WRAPPER_WINAPI(ret_type, func, ...) \
     extern "C" __declspec(dllimport) ret_type __stdcall func(__VA_ARGS__);
+#elif defined(__FreeBSD__)
+# define WRAP(x) __interceptor_ ## x
+# define WRAPPER_NAME(x) "__interceptor_" #x
+# define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default")))
+// FreeBSD's dynamic linker (incompliantly) gives non-weak symbols higher
+// priority than weak ones so weak aliases won't work for indirect calls
+// in position-independent (-fPIC / -fPIE) mode.
+# define DECLARE_WRAPPER(ret_type, func, ...) \
+     extern "C" ret_type func(__VA_ARGS__) \
+     __attribute__((alias("__interceptor_" #func), visibility("default")));
 #else
 # define WRAP(x) __interceptor_ ## x
 # define WRAPPER_NAME(x) "__interceptor_" #x
diff --git a/lib/interception/interception_win.cc b/lib/interception/interception_win.cc
index 332fc71..9eabe52 100644
--- a/lib/interception/interception_win.cc
+++ b/lib/interception/interception_win.cc
@@ -121,6 +121,7 @@
     }
     switch (0x00FFFFFF & *(unsigned int*)(code + cursor)) {
       case 0x24448A:  // 8A 44 24 XX = mov eal, dword ptr [esp+XXh]
+      case 0x24448B:  // 8B 44 24 XX = mov eax, dword ptr [esp+XXh]
       case 0x244C8B:  // 8B 4C 24 XX = mov ecx, dword ptr [esp+XXh]
       case 0x24548B:  // 8B 54 24 XX = mov edx, dword ptr [esp+XXh]
       case 0x24748B:  // 8B 74 24 XX = mov esi, dword ptr [esp+XXh]
@@ -133,8 +134,9 @@
     // FIXME: Unknown instruction failures might happen when we add a new
     // interceptor or a new compiler version. In either case, they should result
     // in visible and readable error messages. However, merely calling abort()
-    // or __debugbreak() leads to an infinite recursion in CheckFailed.
+    // leads to an infinite recursion in CheckFailed.
     // Do we have a good way to abort with an error message here?
+    __debugbreak();
     return 0;
   }
 
diff --git a/lib/lsan/CMakeLists.txt b/lib/lsan/CMakeLists.txt
index 0924282..82e9aa7 100644
--- a/lib/lsan/CMakeLists.txt
+++ b/lib/lsan/CMakeLists.txt
@@ -19,7 +19,7 @@
 # The common files need to build on every arch supported by ASan.
 # (Even if they build into dummy object files.)
 filter_available_targets(LSAN_COMMON_SUPPORTED_ARCH
-  x86_64 i386 powerpc64 arm)
+  x86_64 i386 powerpc64 arm aarch64 mips)
 
 add_custom_target(lsan)
 
diff --git a/lib/lsan/lsan_common.cc b/lib/lsan/lsan_common.cc
index 09ecac2..92da6b0 100644
--- a/lib/lsan/lsan_common.cc
+++ b/lib/lsan/lsan_common.cc
@@ -131,9 +131,9 @@
   }
 }
 
-class Decorator: private __sanitizer::AnsiColorDecorator {
+class Decorator: public __sanitizer::SanitizerCommonDecorator {
  public:
-  Decorator() : __sanitizer::AnsiColorDecorator(PrintsToTtyCached()) { }
+  Decorator() : SanitizerCommonDecorator() { }
   const char *Error() { return Red(); }
   const char *Leak() { return Blue(); }
   const char *End() { return Default(); }
diff --git a/lib/lsan/lsan_interceptors.cc b/lib/lsan/lsan_interceptors.cc
index 38dc62e..ad8ca90 100644
--- a/lib/lsan/lsan_interceptors.cc
+++ b/lib/lsan/lsan_interceptors.cc
@@ -105,6 +105,12 @@
   return Allocate(stack, size, alignment, kAlwaysClearMemory);
 }
 
+INTERCEPTOR(void*, aligned_alloc, uptr alignment, uptr size) {
+  ENSURE_LSAN_INITED;
+  GET_STACK_TRACE;
+  return Allocate(stack, size, alignment, kAlwaysClearMemory);
+}
+
 INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
   ENSURE_LSAN_INITED;
   GET_STACK_TRACE;
diff --git a/lib/msan/msan.cc b/lib/msan/msan.cc
index 5ee92bf..fd7fdbb 100644
--- a/lib/msan/msan.cc
+++ b/lib/msan/msan.cc
@@ -144,6 +144,9 @@
   ParseFlag(str, &f->report_umrs, "report_umrs", "");
   ParseFlag(str, &f->wrap_signals, "wrap_signals", "");
   ParseFlag(str, &f->print_stats, "print_stats", "");
+  ParseFlag(str, &f->atexit, "atexit", "");
+  ParseFlag(str, &f->store_context_size, "store_context_size", "");
+  if (f->store_context_size < 1) f->store_context_size = 1;
 
   // keep_going is an old name for halt_on_error,
   // and it has inverse meaning.
@@ -161,6 +164,7 @@
   cf->handle_ioctl = true;
   // FIXME: test and enable.
   cf->check_printf = false;
+  cf->intercept_tls_get_addr = true;
 
   internal_memset(f, 0, sizeof(*f));
   f->poison_heap_with_zeroes = false;
@@ -173,7 +177,9 @@
   f->report_umrs = true;
   f->wrap_signals = true;
   f->print_stats = false;
+  f->atexit = false;
   f->halt_on_error = !&__msan_keep_going;
+  f->store_context_size = 20;
 
   // Override from user-specified string.
   if (__msan_default_options)
@@ -197,10 +203,6 @@
   PrintWarningWithOrigin(pc, bp, __msan_origin_tls);
 }
 
-bool OriginIsValid(u32 origin) {
-  return origin != 0 && origin != (u32)-1;
-}
-
 void PrintWarningWithOrigin(uptr pc, uptr bp, u32 origin) {
   if (msan_expect_umr) {
     // Printf("Expected UMR\n");
@@ -214,10 +216,10 @@
   GET_FATAL_STACK_TRACE_PC_BP(pc, bp);
 
   u32 report_origin =
-    (__msan_get_track_origins() && OriginIsValid(origin)) ? origin : 0;
+    (__msan_get_track_origins() && Origin(origin).isValid()) ? origin : 0;
   ReportUMR(&stack, report_origin);
 
-  if (__msan_get_track_origins() && !OriginIsValid(origin)) {
+  if (__msan_get_track_origins() && !Origin(origin).isValid()) {
     Printf(
         "  ORIGIN: invalid (%x). Might be a bug in MemorySanitizer origin "
         "tracking.\n    This could still be a bug in your code, too!\n",
@@ -318,7 +320,15 @@
 
 #define MSAN_MAYBE_STORE_ORIGIN(type, size)                       \
   void __msan_maybe_store_origin_##size(type s, void *p, u32 o) { \
-    if (UNLIKELY(s)) *(u32 *)MEM_TO_ORIGIN((uptr)p &~3UL) = o;    \
+    if (UNLIKELY(s)) {                                            \
+      if (__msan_get_track_origins() > 1) {                       \
+        GET_CALLER_PC_BP_SP;                                      \
+        (void) sp;                                                \
+        GET_STORE_STACK_TRACE_PC_BP(pc, bp);                      \
+        o = ChainOrigin(o, &stack);                               \
+      }                                                           \
+      *(u32 *)MEM_TO_ORIGIN((uptr)p & ~3UL) = o;                  \
+    }                                                             \
   }
 
 MSAN_MAYBE_STORE_ORIGIN(u8, 1)
@@ -444,7 +454,11 @@
 
   unsigned char *s = (unsigned char*)MEM_TO_SHADOW(x);
   for (uptr i = 0; i < size; i++) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    Printf("%x%x ", s[i] & 0xf, s[i] >> 4);
+#else
     Printf("%x%x ", s[i] >> 4, s[i] & 0xf);
+#endif
   }
   Printf("\n");
 }
@@ -571,7 +585,7 @@
   }
   if (print)
     Printf("__msan_set_alloca_origin: descr=%s id=%x\n", descr + 4, id);
-  __msan_set_origin(a, size, id);
+  __msan_set_origin(a, size, Origin(id, 1).raw_id());
 }
 
 u32 __msan_chain_origin(u32 id) {
diff --git a/lib/msan/msan.h b/lib/msan/msan.h
index 2105791..05a8c47 100644
--- a/lib/msan/msan.h
+++ b/lib/msan/msan.h
@@ -104,12 +104,12 @@
         StackTrace::GetCurrentPc(), GET_CURRENT_FRAME(),           \
         common_flags()->fast_unwind_on_malloc)
 
-#define GET_STORE_STACK_TRACE_PC_BP(pc, bp)                            \
-  StackTrace stack;                                                    \
-  stack.size = 0;                                                      \
-  if (__msan_get_track_origins() > 1 && msan_inited)                   \
-    GetStackTrace(&stack, common_flags()->malloc_context_size, pc, bp, \
-                  common_flags()->fast_unwind_on_malloc)
+#define GET_STORE_STACK_TRACE_PC_BP(pc, bp)                  \
+  StackTrace stack;                                          \
+  stack.size = 0;                                            \
+  if (__msan_get_track_origins() > 1 && msan_inited)         \
+  GetStackTrace(&stack, flags()->store_context_size, pc, bp, \
+                common_flags()->fast_unwind_on_malloc)
 
 #define GET_FATAL_STACK_TRACE_PC_BP(pc, bp)       \
   StackTrace stack;                               \
@@ -141,8 +141,10 @@
 }  // namespace __msan
 
 #define MSAN_MALLOC_HOOK(ptr, size) \
-  if (&__msan_malloc_hook) __msan_malloc_hook(ptr, size)
+  if (&__msan_malloc_hook) __msan_malloc_hook(ptr, size); \
+  if (&__sanitizer_malloc_hook) __sanitizer_malloc_hook(ptr, size)
 #define MSAN_FREE_HOOK(ptr) \
-  if (&__msan_free_hook) __msan_free_hook(ptr)
+  if (&__msan_free_hook) __msan_free_hook(ptr); \
+  if (&__sanitizer_free_hook) __sanitizer_free_hook(ptr)
 
 #endif  // MSAN_H
diff --git a/lib/msan/msan_allocator.cc b/lib/msan/msan_allocator.cc
index e6da9c1..fb1788f 100644
--- a/lib/msan/msan_allocator.cc
+++ b/lib/msan/msan_allocator.cc
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "msan.h"
 #include "msan_allocator.h"
@@ -116,7 +117,7 @@
   CHECK(p);
   Init();
   MSAN_FREE_HOOK(p);
-  Metadata *meta = reinterpret_cast<Metadata*>(allocator.GetMetaData(p));
+  Metadata *meta = reinterpret_cast<Metadata *>(allocator.GetMetaData(p));
   uptr size = meta->requested_size;
   meta->requested_size = 0;
   // This memory will not be reused by anyone else, so we are free to keep it
@@ -128,7 +129,7 @@
       CHECK(stack_id);
       u32 id;
       ChainedOriginDepotPut(stack_id, Origin::kHeapRoot, &id);
-      __msan_set_origin(p, size,  Origin(id, 1).raw_id());
+      __msan_set_origin(p, size, Origin(id, 1).raw_id());
     }
   }
   MsanThread *t = GetCurrentThread();
@@ -171,12 +172,10 @@
 }
 
 static uptr AllocationSize(const void *p) {
-  if (p == 0)
-    return 0;
+  if (p == 0) return 0;
   const void *beg = allocator.GetBlockBegin(p);
-  if (beg != p)
-    return 0;
-  Metadata *b = (Metadata*)allocator.GetMetaData(p);
+  if (beg != p) return 0;
+  Metadata *b = (Metadata *)allocator.GetMetaData(p);
   return b->requested_size;
 }
 
@@ -184,38 +183,45 @@
 
 using namespace __msan;
 
+uptr __sanitizer_get_current_allocated_bytes() {
+  uptr stats[AllocatorStatCount];
+  allocator.GetStats(stats);
+  return stats[AllocatorStatAllocated];
+}
 uptr __msan_get_current_allocated_bytes() {
-  u64 stats[AllocatorStatCount];
-  allocator.GetStats(stats);
-  u64 m = stats[AllocatorStatMalloced];
-  u64 f = stats[AllocatorStatFreed];
-  return m >= f ? m - f : 1;
+  return __sanitizer_get_current_allocated_bytes();
 }
 
+uptr __sanitizer_get_heap_size() {
+  uptr stats[AllocatorStatCount];
+  allocator.GetStats(stats);
+  return stats[AllocatorStatMapped];
+}
 uptr __msan_get_heap_size() {
-  u64 stats[AllocatorStatCount];
-  allocator.GetStats(stats);
-  u64 m = stats[AllocatorStatMmapped];
-  u64 f = stats[AllocatorStatUnmapped];
-  return m >= f ? m - f : 1;
+  return __sanitizer_get_heap_size();
 }
 
+uptr __sanitizer_get_free_bytes() { return 1; }
 uptr __msan_get_free_bytes() {
-  return 1;
+  return __sanitizer_get_free_bytes();
 }
 
+uptr __sanitizer_get_unmapped_bytes() { return 1; }
 uptr __msan_get_unmapped_bytes() {
-  return 1;
+  return __sanitizer_get_unmapped_bytes();
 }
 
+uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; }
 uptr __msan_get_estimated_allocated_size(uptr size) {
-  return size;
+  return __sanitizer_get_estimated_allocated_size(size);
 }
 
+int __sanitizer_get_ownership(const void *p) { return AllocationSize(p) != 0; }
 int __msan_get_ownership(const void *p) {
-  return AllocationSize(p) != 0;
+  return __sanitizer_get_ownership(p);
 }
 
+uptr __sanitizer_get_allocated_size(const void *p) { return AllocationSize(p); }
 uptr __msan_get_allocated_size(const void *p) {
-  return AllocationSize(p);
+  return __sanitizer_get_allocated_size(p);
 }
diff --git a/lib/msan/msan_chained_origin_depot.cc b/lib/msan/msan_chained_origin_depot.cc
index a98bcf5..faf0461 100644
--- a/lib/msan/msan_chained_origin_depot.cc
+++ b/lib/msan/msan_chained_origin_depot.cc
@@ -19,7 +19,30 @@
 struct ChainedOriginDepotDesc {
   u32 here_id;
   u32 prev_id;
-  u32 hash() const { return here_id ^ prev_id; }
+  u32 hash() const {
+    const u32 m = 0x5bd1e995;
+    const u32 seed = 0x9747b28c;
+    const u32 r = 24;
+    u32 h = seed;
+    u32 k = here_id;
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+    h *= m;
+    h ^= k;
+
+    k = prev_id;
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+    h *= m;
+    h ^= k;
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+    return h;
+  }
   bool is_valid() { return true; }
 };
 
@@ -58,7 +81,8 @@
   typedef Handle handle_type;
 };
 
-static StackDepotBase<ChainedOriginDepotNode, 3> chainedOriginDepot;
+// kTabSizeLog = 22 => 32Mb static storage for bucket pointers.
+static StackDepotBase<ChainedOriginDepotNode, 3, 20> chainedOriginDepot;
 
 StackDepotStats *ChainedOriginDepotGetStats() {
   return chainedOriginDepot.GetStats();
diff --git a/lib/msan/msan_flags.h b/lib/msan/msan_flags.h
index 47ac7e4..9b93f11 100644
--- a/lib/msan/msan_flags.h
+++ b/lib/msan/msan_flags.h
@@ -29,6 +29,8 @@
   bool wrap_signals;
   bool print_stats;
   bool halt_on_error;
+  bool atexit;
+  int store_context_size; // like malloc_context_size, but for uninit stores
 };
 
 Flags *flags();
diff --git a/lib/msan/msan_interceptors.cc b/lib/msan/msan_interceptors.cc
index e260ec3..3394690 100644
--- a/lib/msan/msan_interceptors.cc
+++ b/lib/msan/msan_interceptors.cc
@@ -21,6 +21,7 @@
 #include "msan_thread.h"
 #include "sanitizer_common/sanitizer_platform_limits_posix.h"
 #include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_common.h"
@@ -28,6 +29,7 @@
 #include "sanitizer_common/sanitizer_stackdepot.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_tls_get_addr.h"
 
 #include <stdarg.h>
 // ACHTUNG! No other system header includes in this file.
@@ -161,8 +163,20 @@
   return ptr;
 }
 
-INTERCEPTOR(void *, __libc_memalign, uptr align, uptr s)
-    ALIAS(WRAPPER_NAME(memalign));
+INTERCEPTOR(void *, aligned_alloc, SIZE_T boundary, SIZE_T size) {
+  GET_MALLOC_STACK_TRACE;
+  CHECK_EQ(boundary & (boundary - 1), 0);
+  void *ptr = MsanReallocate(&stack, 0, size, boundary, false);
+  return ptr;
+}
+
+INTERCEPTOR(void *, __libc_memalign, SIZE_T boundary, SIZE_T size) {
+  GET_MALLOC_STACK_TRACE;
+  CHECK_EQ(boundary & (boundary - 1), 0);
+  void *ptr = MsanReallocate(&stack, 0, size, boundary, false);
+  DTLS_on_libc_memalign(ptr, size * boundary);
+  return ptr;
+}
 
 INTERCEPTOR(void *, valloc, SIZE_T size) {
   GET_MALLOC_STACK_TRACE;
@@ -195,7 +209,7 @@
 }
 
 INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
-  return __msan_get_allocated_size(ptr);
+  return __sanitizer_get_allocated_size(ptr);
 }
 
 // This function actually returns a struct by value, but we can't unpoison a
@@ -362,23 +376,34 @@
     INTERCEPTOR_STRTO_BODY(ret_type, func, nptr, endptr, base, loc);     \
   }
 
-INTERCEPTOR_STRTO(double, strtod)                           // NOLINT
-INTERCEPTOR_STRTO(float, strtof)                            // NOLINT
-INTERCEPTOR_STRTO(long double, strtold)                     // NOLINT
-INTERCEPTOR_STRTO_BASE(long, strtol)                        // NOLINT
-INTERCEPTOR_STRTO_BASE(long long, strtoll)                  // NOLINT
-INTERCEPTOR_STRTO_BASE(unsigned long, strtoul)              // NOLINT
-INTERCEPTOR_STRTO_BASE(unsigned long long, strtoull)        // NOLINT
-INTERCEPTOR_STRTO_LOC(double, strtod_l)                     // NOLINT
-INTERCEPTOR_STRTO_LOC(double, __strtod_l)                   // NOLINT
-INTERCEPTOR_STRTO_LOC(float, strtof_l)                      // NOLINT
-INTERCEPTOR_STRTO_LOC(float, __strtof_l)                    // NOLINT
-INTERCEPTOR_STRTO_LOC(long double, strtold_l)               // NOLINT
-INTERCEPTOR_STRTO_LOC(long double, __strtold_l)             // NOLINT
-INTERCEPTOR_STRTO_BASE_LOC(long, strtol_l)                  // NOLINT
-INTERCEPTOR_STRTO_BASE_LOC(long long, strtoll_l)            // NOLINT
-INTERCEPTOR_STRTO_BASE_LOC(unsigned long, strtoul_l)        // NOLINT
-INTERCEPTOR_STRTO_BASE_LOC(unsigned long long, strtoull_l)  // NOLINT
+INTERCEPTOR_STRTO(double, strtod)                                    // NOLINT
+INTERCEPTOR_STRTO(float, strtof)                                     // NOLINT
+INTERCEPTOR_STRTO(long double, strtold)                              // NOLINT
+INTERCEPTOR_STRTO_BASE(long, strtol)                                 // NOLINT
+INTERCEPTOR_STRTO_BASE(long long, strtoll)                           // NOLINT
+INTERCEPTOR_STRTO_BASE(unsigned long, strtoul)                       // NOLINT
+INTERCEPTOR_STRTO_BASE(unsigned long long, strtoull)                 // NOLINT
+INTERCEPTOR_STRTO_LOC(double, strtod_l)                              // NOLINT
+INTERCEPTOR_STRTO_LOC(double, __strtod_l)                            // NOLINT
+INTERCEPTOR_STRTO_LOC(double, __strtod_internal)                     // NOLINT
+INTERCEPTOR_STRTO_LOC(float, strtof_l)                               // NOLINT
+INTERCEPTOR_STRTO_LOC(float, __strtof_l)                             // NOLINT
+INTERCEPTOR_STRTO_LOC(float, __strtof_internal)                      // NOLINT
+INTERCEPTOR_STRTO_LOC(long double, strtold_l)                        // NOLINT
+INTERCEPTOR_STRTO_LOC(long double, __strtold_l)                      // NOLINT
+INTERCEPTOR_STRTO_LOC(long double, __strtold_internal)               // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(long, strtol_l)                           // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(long, __strtol_l)                         // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(long, __strtol_internal)                  // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(long long, strtoll_l)                     // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(long long, __strtoll_l)                   // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(long long, __strtoll_internal)            // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(unsigned long, strtoul_l)                 // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(unsigned long, __strtoul_l)               // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(unsigned long, __strtoul_internal)        // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(unsigned long long, strtoull_l)           // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(unsigned long long, __strtoull_l)         // NOLINT
+INTERCEPTOR_STRTO_BASE_LOC(unsigned long long, __strtoull_internal)  // NOLINT
 
 // FIXME: support *wprintf in common format interceptors.
 INTERCEPTOR(int, vswprintf, void *str, uptr size, void *format, va_list ap) {
@@ -1239,13 +1264,14 @@
   // hack until we have a really fast internal_memset
   if (sizeof(uptr) == 8 &&
       (n % 8) == 0 &&
-      ((uptr)ptr % 8) == 0 &&
-      (c == 0 || c == -1)) {
-    // Printf("memset %p %zd %x\n", ptr, n, c);
-    uptr to_store = c ? -1L : 0L;
+      ((uptr)ptr % 8) == 0) {
+    uptr c8 = (unsigned)c & 0xFF;
+    c8 = (c8 << 8) | c8;
+    c8 = (c8 << 16) | c8;
+    c8 = (c8 << 32) | c8;
     uptr *p = (uptr*)ptr;
     for (SIZE_T i = 0; i < n / 8; i++)
-      p[i] = to_store;
+      p[i] = c8;
     return ptr;
   }
   return internal_memset(ptr, c, n);
@@ -1378,7 +1404,7 @@
   uptr beg = d & ~3UL;
   // Copy left unaligned origin if that memory is poisoned.
   if (beg < d) {
-    u32 o = GetOriginIfPoisoned(beg, d - beg);
+    u32 o = GetOriginIfPoisoned((uptr)src, d - beg);
     if (o) {
       if (__msan_get_track_origins() > 1) o = ChainOrigin(o, stack);
       *(u32 *)MEM_TO_ORIGIN(beg) = o;
@@ -1386,15 +1412,17 @@
     beg += 4;
   }
 
-  uptr end = (d + size + 3) & ~3UL;
+  uptr end = (d + size) & ~3UL;
+  // If both ends fall into the same 4-byte slot, we are done.
+  if (end < beg) return;
+
   // Copy right unaligned origin if that memory is poisoned.
-  if (end > d + size) {
-    u32 o = GetOriginIfPoisoned(d + size, end - d - size);
+  if (end < d + size) {
+    u32 o = GetOriginIfPoisoned((uptr)src + (end - d), (d + size) - end);
     if (o) {
       if (__msan_get_track_origins() > 1) o = ChainOrigin(o, stack);
-      *(u32 *)MEM_TO_ORIGIN(end - 4) = o;
+      *(u32 *)MEM_TO_ORIGIN(end) = o;
     }
-    end -= 4;
   }
 
   if (beg < end) {
@@ -1404,7 +1432,7 @@
     if (__msan_get_track_origins() > 1) {
       u32 *src = (u32 *)MEM_TO_ORIGIN(s);
       u32 *src_s = (u32 *)MEM_TO_SHADOW(s);
-      u32 *src_end = src + (end - beg);
+      u32 *src_end = (u32 *)MEM_TO_ORIGIN(s + (end - beg));
       u32 *dst = (u32 *)MEM_TO_ORIGIN(beg);
       u32 src_o = 0;
       u32 dst_o = 0;
@@ -1449,6 +1477,7 @@
   INTERCEPT_FUNCTION(mmap64);
   INTERCEPT_FUNCTION(posix_memalign);
   INTERCEPT_FUNCTION(memalign);
+  INTERCEPT_FUNCTION(__libc_memalign);
   INTERCEPT_FUNCTION(valloc);
   INTERCEPT_FUNCTION(pvalloc);
   INTERCEPT_FUNCTION(malloc);
@@ -1485,23 +1514,34 @@
   INTERCEPT_FUNCTION(gcvt);
   INTERCEPT_FUNCTION(strcat);  // NOLINT
   INTERCEPT_FUNCTION(strncat);  // NOLINT
+  INTERCEPT_FUNCTION(strtod);
+  INTERCEPT_FUNCTION(strtof);
+  INTERCEPT_FUNCTION(strtold);
   INTERCEPT_FUNCTION(strtol);
   INTERCEPT_FUNCTION(strtoll);
   INTERCEPT_FUNCTION(strtoul);
   INTERCEPT_FUNCTION(strtoull);
-  INTERCEPT_FUNCTION(strtod);
   INTERCEPT_FUNCTION(strtod_l);
   INTERCEPT_FUNCTION(__strtod_l);
-  INTERCEPT_FUNCTION(strtof);
+  INTERCEPT_FUNCTION(__strtod_internal);
   INTERCEPT_FUNCTION(strtof_l);
   INTERCEPT_FUNCTION(__strtof_l);
-  INTERCEPT_FUNCTION(strtold);
+  INTERCEPT_FUNCTION(__strtof_internal);
   INTERCEPT_FUNCTION(strtold_l);
   INTERCEPT_FUNCTION(__strtold_l);
+  INTERCEPT_FUNCTION(__strtold_internal);
   INTERCEPT_FUNCTION(strtol_l);
+  INTERCEPT_FUNCTION(__strtol_l);
+  INTERCEPT_FUNCTION(__strtol_internal);
   INTERCEPT_FUNCTION(strtoll_l);
+  INTERCEPT_FUNCTION(__strtoll_l);
+  INTERCEPT_FUNCTION(__strtoll_internal);
   INTERCEPT_FUNCTION(strtoul_l);
+  INTERCEPT_FUNCTION(__strtoul_l);
+  INTERCEPT_FUNCTION(__strtoul_internal);
   INTERCEPT_FUNCTION(strtoull_l);
+  INTERCEPT_FUNCTION(__strtoull_l);
+  INTERCEPT_FUNCTION(__strtoull_internal);
   INTERCEPT_FUNCTION(vswprintf);
   INTERCEPT_FUNCTION(swprintf);
   INTERCEPT_FUNCTION(strxfrm);
diff --git a/lib/msan/msan_interface_internal.h b/lib/msan/msan_interface_internal.h
index c005c2a..47b47dc 100644
--- a/lib/msan/msan_interface_internal.h
+++ b/lib/msan/msan_interface_internal.h
@@ -161,32 +161,27 @@
 SANITIZER_INTERFACE_ATTRIBUTE
 void __sanitizer_unaligned_store64(uu64 *p, u64 x);
 
+// ---------------------------
+// FIXME: Replace these functions with __sanitizer equivalent.
 SANITIZER_INTERFACE_ATTRIBUTE
 uptr __msan_get_estimated_allocated_size(uptr size);
-
 SANITIZER_INTERFACE_ATTRIBUTE
 int __msan_get_ownership(const void *p);
-
 SANITIZER_INTERFACE_ATTRIBUTE
 uptr __msan_get_allocated_size(const void *p);
-
 SANITIZER_INTERFACE_ATTRIBUTE
 uptr __msan_get_current_allocated_bytes();
-
 SANITIZER_INTERFACE_ATTRIBUTE
 uptr __msan_get_heap_size();
-
 SANITIZER_INTERFACE_ATTRIBUTE
 uptr __msan_get_free_bytes();
-
 SANITIZER_INTERFACE_ATTRIBUTE
 uptr __msan_get_unmapped_bytes();
-
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 /* OPTIONAL */ void __msan_malloc_hook(void *ptr, uptr size);
-
 SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
 /* OPTIONAL */ void __msan_free_hook(void *ptr);
+// ---------------------------
 
 SANITIZER_INTERFACE_ATTRIBUTE
 void __msan_dr_is_initialized();
diff --git a/lib/msan/msan_linux.cc b/lib/msan/msan_linux.cc
index 72f7c59..a8fbabb 100644
--- a/lib/msan/msan_linux.cc
+++ b/lib/msan/msan_linux.cc
@@ -90,12 +90,11 @@
 }
 
 static void MsanAtExit(void) {
+  if (flags()->print_stats && (flags()->atexit || msan_report_count > 0))
+    ReportStats();
   if (msan_report_count > 0) {
     ReportAtExitStatistics();
-    if (flags()->print_stats)
-      ReportStats();
-    if (flags()->exit_code)
-      _exit(flags()->exit_code);
+    if (flags()->exit_code) _exit(flags()->exit_code);
   }
 }
 
diff --git a/lib/msan/msan_origin.h b/lib/msan/msan_origin.h
index 64acf1e..a415650 100644
--- a/lib/msan/msan_origin.h
+++ b/lib/msan/msan_origin.h
@@ -66,6 +66,7 @@
   u32 raw_id() const { return raw_id_; }
   bool isStackRoot() const { return raw_id_ == kStackRoot; }
   bool isHeapRoot() const { return raw_id_ == kHeapRoot; }
+  bool isValid() const { return raw_id_ != 0 && raw_id_ != (u32)-1; }
 
  private:
   u32 raw_id_;
diff --git a/lib/msan/msan_report.cc b/lib/msan/msan_report.cc
index ee8c2b2..85e61e2 100644
--- a/lib/msan/msan_report.cc
+++ b/lib/msan/msan_report.cc
@@ -27,9 +27,9 @@
 
 namespace __msan {
 
-class Decorator: private __sanitizer::AnsiColorDecorator {
+class Decorator: public __sanitizer::SanitizerCommonDecorator {
  public:
-  Decorator() : __sanitizer::AnsiColorDecorator(PrintsToTtyCached()) { }
+  Decorator() : SanitizerCommonDecorator() { }
   const char *Warning()    { return Red(); }
   const char *Origin()     { return Magenta(); }
   const char *Name()   { return Green(); }
@@ -63,6 +63,10 @@
   Decorator d;
   while (true) {
     Origin o(id);
+    if (!o.isValid()) {
+      Printf("  %sinvalid origin id(%d)%s\n", d.Warning(), id, d.End());
+      break;
+    }
     u32 prev_id;
     u32 stack_id = ChainedOriginDepotGet(o.id(), &prev_id);
     Origin prev_o(prev_id);
@@ -86,7 +90,7 @@
       // FIXME: copied? modified? passed through? observed?
       Printf("  %sUninitialized value was stored to memory at%s\n", d.Origin(),
              d.End());
-      StackTrace::PrintStack(trace, size - 1);
+      StackTrace::PrintStack(trace, size);
       id = prev_id;
     }
   }
@@ -217,7 +221,11 @@
     } else {
       unsigned char v = *(unsigned char *)s;
       if (v) last_quad_poisoned = true;
-      Printf("%02x", v);
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+      Printf("%x%x", v & 0xf, v >> 4);
+#else
+      Printf("%x%x", v >> 4, v & 0xf);
+#endif
     }
     // Group end.
     if (pos % 4 == 3 && with_origins) {
diff --git a/lib/msan/msan_thread.cc b/lib/msan/msan_thread.cc
index 2289be3..5fe99f6 100644
--- a/lib/msan/msan_thread.cc
+++ b/lib/msan/msan_thread.cc
@@ -3,6 +3,8 @@
 #include "msan_thread.h"
 #include "msan_interface_internal.h"
 
+#include "sanitizer_common/sanitizer_tls_get_addr.h"
+
 namespace __msan {
 
 MsanThread *MsanThread::Create(thread_callback_t start_routine,
@@ -33,6 +35,9 @@
   __msan_unpoison((void *)stack_bottom_, stack_top_ - stack_bottom_);
   if (tls_begin_ != tls_end_)
     __msan_unpoison((void *)tls_begin_, tls_end_ - tls_begin_);
+  DTLS *dtls = DTLS_Get();
+  for (uptr i = 0; i < dtls->dtv_size; ++i)
+    __msan_unpoison((void *)(dtls->dtv[i].beg), dtls->dtv[i].size);
 }
 
 void MsanThread::Init() {
@@ -55,6 +60,7 @@
   ClearShadowForThreadStackAndTLS();
   uptr size = RoundUpTo(sizeof(MsanThread), GetPageSizeCached());
   UnmapOrDie(this, size);
+  DTLS_Destroy();
 }
 
 thread_return_t MsanThread::ThreadStart() {
diff --git a/lib/msan/tests/msan_test.cc b/lib/msan/tests/msan_test.cc
index d0b5ce2..f7268d6 100644
--- a/lib/msan/tests/msan_test.cc
+++ b/lib/msan/tests/msan_test.cc
@@ -18,6 +18,7 @@
 
 #include "sanitizer_common/tests/sanitizer_test_utils.h"
 
+#include "sanitizer/allocator_interface.h"
 #include "sanitizer/msan_interface.h"
 #include "msandr_test_so.h"
 
@@ -147,6 +148,12 @@
   return res;
 }
 
+template<typename T>
+T Poisoned(T v = 0, T s = (T)(-1)) {
+  __msan_partial_poison(&v, &s, sizeof(T));
+  return v;
+}
+
 template<class T> NOINLINE T ReturnPoisoned() { return *GetPoisoned<T>(); }
 
 static volatile int g_one = 1;
@@ -1285,33 +1292,56 @@
   EXPECT_POISONED(y[1]);
 }
 
-void TestUnalignedMemcpy(int left, int right, bool src_is_aligned) {
-  const int sz = 20;
+void TestUnalignedMemcpy(unsigned left, unsigned right, bool src_is_aligned,
+                         bool src_is_poisoned, bool dst_is_poisoned) {
+  fprintf(stderr, "%s(%d, %d, %d, %d, %d)\n", __func__, left, right,
+          src_is_aligned, src_is_poisoned, dst_is_poisoned);
+
+  const unsigned sz = 20;
+  U4 dst_origin, src_origin;
   char *dst = (char *)malloc(sz);
-  U4 origin = __msan_get_origin(dst);
+  if (dst_is_poisoned)
+    dst_origin = __msan_get_origin(dst);
+  else
+    memset(dst, 0, sz);
 
   char *src = (char *)malloc(sz);
-  memset(src, 0, sz);
+  if (src_is_poisoned)
+    src_origin = __msan_get_origin(src);
+  else
+    memset(src, 0, sz);
 
   memcpy(dst + left, src_is_aligned ? src + left : src, sz - left - right);
-  for (int i = 0; i < left; ++i)
-    EXPECT_POISONED_O(dst[i], origin);
-  for (int i = 0; i < right; ++i)
-    EXPECT_POISONED_O(dst[sz - i - 1], origin);
-  EXPECT_NOT_POISONED(dst[left]);
-  EXPECT_NOT_POISONED(dst[sz - right - 1]);
+
+  for (unsigned i = 0; i < (left & (~3U)); ++i)
+    if (dst_is_poisoned)
+      EXPECT_POISONED_O(dst[i], dst_origin);
+    else
+      EXPECT_NOT_POISONED(dst[i]);
+
+  for (unsigned i = 0; i < (right & (~3U)); ++i)
+    if (dst_is_poisoned)
+      EXPECT_POISONED_O(dst[sz - i - 1], dst_origin);
+    else
+      EXPECT_NOT_POISONED(dst[sz - i - 1]);
+
+  for (unsigned i = left; i < sz - right; ++i)
+    if (src_is_poisoned)
+      EXPECT_POISONED_O(dst[i], src_origin);
+    else
+      EXPECT_NOT_POISONED(dst[i]);
 
   free(dst);
   free(src);
 }
 
 TEST(MemorySanitizer, memcpy_unaligned) {
-  for (int i = 0; i < 10; ++i) {
-    for (int j = 0; j < 10; ++j) {
-      TestUnalignedMemcpy(i, j, true);
-      TestUnalignedMemcpy(i, j, false);
-    }
-  }
+  for (int i = 0; i < 10; ++i)
+    for (int j = 0; j < 10; ++j)
+      for (int aligned = 0; aligned < 2; ++aligned)
+        for (int srcp = 0; srcp < 2; ++srcp)
+          for (int dstp = 0; dstp < 2; ++dstp)
+            TestUnalignedMemcpy(i, j, aligned, srcp, dstp);
 }
 
 TEST(MemorySanitizer, memmove) {
@@ -1565,15 +1595,13 @@
 TEST_STRTO_INT_LOC(strtoul_l)
 TEST_STRTO_INT_LOC(strtoull_l)
 
-// https://code.google.com/p/memory-sanitizer/issues/detail?id=36
-TEST(MemorySanitizer, DISABLED_strtoimax) {
+TEST(MemorySanitizer, strtoimax) {
   char *e;
   ASSERT_EQ(1, strtoimax("1", &e, 10));
   EXPECT_NOT_POISONED((S8) e);
 }
 
-// https://code.google.com/p/memory-sanitizer/issues/detail?id=36
-TEST(MemorySanitizer, DISABLED_strtoumax) {
+TEST(MemorySanitizer, strtoumax) {
   char *e;
   ASSERT_EQ(1U, strtoumax("1", &e, 10));
   EXPECT_NOT_POISONED((S8) e);
@@ -2987,12 +3015,12 @@
 TEST(MemorySanitizer, pvalloc) {
   void *p = pvalloc(kPageSize + 100);
   EXPECT_EQ(0U, (uintptr_t)p % kPageSize);
-  EXPECT_EQ(2 * kPageSize, __msan_get_allocated_size(p));
+  EXPECT_EQ(2 * kPageSize, __sanitizer_get_allocated_size(p));
   free(p);
 
   p = pvalloc(0);  // pvalloc(0) should allocate at least one page.
   EXPECT_EQ(0U, (uintptr_t)p % kPageSize);
-  EXPECT_EQ(kPageSize, __msan_get_allocated_size(p));
+  EXPECT_EQ(kPageSize, __sanitizer_get_allocated_size(p));
   free(p);
 }
 
@@ -3521,12 +3549,17 @@
   EXPECT_POISONED_O(x[11], originx3);
 }
 
+#if defined(__clang__)
 namespace {
+typedef U1 V16x8 __attribute__((__vector_size__(16)));
 typedef U2 V8x16 __attribute__((__vector_size__(16)));
 typedef U4 V4x32 __attribute__((__vector_size__(16)));
 typedef U8 V2x64 __attribute__((__vector_size__(16)));
 typedef U4 V8x32 __attribute__((__vector_size__(32)));
 typedef U8 V4x64 __attribute__((__vector_size__(32)));
+typedef U4 V2x32 __attribute__((__vector_size__(8)));
+typedef U2 V4x16 __attribute__((__vector_size__(8)));
+typedef U1 V8x8 __attribute__((__vector_size__(8)));
 
 
 V8x16 shift_sse2_left_scalar(V8x16 x, U4 y) {
@@ -3538,20 +3571,19 @@
 }
 
 TEST(VectorShiftTest, sse2_left_scalar) {
-  V8x16 v = {(U2)(*GetPoisoned<U2>() | 3), (U2)(*GetPoisoned<U2>() | 7), 2, 3,
-             4,                            5,                            6, 7};
+  V8x16 v = {Poisoned<U2>(0, 3), Poisoned<U2>(0, 7), 2, 3, 4, 5, 6, 7};
   V8x16 u = shift_sse2_left_scalar(v, 2);
   EXPECT_POISONED(u[0]);
   EXPECT_POISONED(u[1]);
-  EXPECT_NOT_POISONED(u[0] | (~7U));
-  EXPECT_NOT_POISONED(u[1] | (~31U));
+  EXPECT_NOT_POISONED(u[0] | (3U << 2));
+  EXPECT_NOT_POISONED(u[1] | (7U << 2));
   u[0] = u[1] = 0;
   EXPECT_NOT_POISONED(u);
 }
 
 TEST(VectorShiftTest, sse2_left_scalar_by_uninit) {
   V8x16 v = {0, 1, 2, 3, 4, 5, 6, 7};
-  V8x16 u = shift_sse2_left_scalar(v, *GetPoisoned<U4>());
+  V8x16 u = shift_sse2_left_scalar(v, Poisoned<U4>());
   EXPECT_POISONED(u[0]);
   EXPECT_POISONED(u[1]);
   EXPECT_POISONED(u[2]);
@@ -3563,23 +3595,21 @@
 }
 
 TEST(VectorShiftTest, sse2_left) {
-  V8x16 v = {(U2)(*GetPoisoned<U2>() | 3), (U2)(*GetPoisoned<U2>() | 7), 2, 3,
-             4,                            5,                            6, 7};
+  V8x16 v = {Poisoned<U2>(0, 3), Poisoned<U2>(0, 7), 2, 3, 4, 5, 6, 7};
   // Top 64 bits of shift count don't affect the result.
-  V2x64 s = {2, *GetPoisoned<U8>()};
+  V2x64 s = {2, Poisoned<U8>()};
   V8x16 u = shift_sse2_left(v, s);
   EXPECT_POISONED(u[0]);
   EXPECT_POISONED(u[1]);
-  EXPECT_NOT_POISONED(u[0] | (~7U));
-  EXPECT_NOT_POISONED(u[1] | (~31U));
+  EXPECT_NOT_POISONED(u[0] | (3U << 2));
+  EXPECT_NOT_POISONED(u[1] | (7U << 2));
   u[0] = u[1] = 0;
   EXPECT_NOT_POISONED(u);
 }
 
 TEST(VectorShiftTest, sse2_left_by_uninit) {
-  V8x16 v = {(U2)(*GetPoisoned<U2>() | 3), (U2)(*GetPoisoned<U2>() | 7), 2, 3,
-             4,                            5,                            6, 7};
-  V2x64 s = {*GetPoisoned<U8>(), *GetPoisoned<U8>()};
+  V8x16 v = {Poisoned<U2>(0, 3), Poisoned<U2>(0, 7), 2, 3, 4, 5, 6, 7};
+  V2x64 s = {Poisoned<U8>(), Poisoned<U8>()};
   V8x16 u = shift_sse2_left(v, s);
   EXPECT_POISONED(u[0]);
   EXPECT_POISONED(u[1]);
@@ -3598,8 +3628,8 @@
 // This is variable vector shift that's only available starting with AVX2.
 // V4x32 shift_avx2_left(V4x32 x, V4x32 y) {
 TEST(VectorShiftTest, avx2_left) {
-  V4x32 v = {(U2)(*GetPoisoned<U2>() | 3), (U2)(*GetPoisoned<U2>() | 7), 2, 3};
-  V4x32 s = {2, *GetPoisoned<U4>(), 3, *GetPoisoned<U4>()};
+  V4x32 v = {Poisoned<U2>(0, 3), Poisoned<U2>(0, 7), 2, 3};
+  V4x32 s = {2, Poisoned<U4>(), 3, Poisoned<U4>()};
   V4x32 u = shift_avx2_left(v, s);
   EXPECT_POISONED(u[0]);
   EXPECT_NOT_POISONED(u[0] | (~7U));
@@ -3612,6 +3642,75 @@
 #endif // __AVX2__
 } // namespace
 
+TEST(VectorPackTest, sse2_packssdw_128) {
+  const unsigned S2_max = (1 << 15) - 1;
+  V4x32 a = {Poisoned<U4>(0, 0xFF0000), Poisoned<U4>(0, 0xFFFF0000),
+             S2_max + 100, 4};
+  V4x32 b = {Poisoned<U4>(0, 0xFF), S2_max + 10000, Poisoned<U4>(0, 0xFF00),
+             S2_max};
+
+  V8x16 c = _mm_packs_epi32(a, b);
+
+  EXPECT_POISONED(c[0]);
+  EXPECT_POISONED(c[1]);
+  EXPECT_NOT_POISONED(c[2]);
+  EXPECT_NOT_POISONED(c[3]);
+  EXPECT_POISONED(c[4]);
+  EXPECT_NOT_POISONED(c[5]);
+  EXPECT_POISONED(c[6]);
+  EXPECT_NOT_POISONED(c[7]);
+
+  EXPECT_EQ(c[2], S2_max);
+  EXPECT_EQ(c[3], 4);
+  EXPECT_EQ(c[5], S2_max);
+  EXPECT_EQ(c[7], S2_max);
+}
+
+TEST(VectorPackTest, mmx_packuswb) {
+  const unsigned U1_max = (1 << 8) - 1;
+  V4x16 a = {Poisoned<U2>(0, 0xFF00), Poisoned<U2>(0, 0xF000U), U1_max + 100,
+             4};
+  V4x16 b = {Poisoned<U2>(0, 0xFF), U1_max - 1, Poisoned<U2>(0, 0xF), U1_max};
+  V8x8 c = _mm_packs_pu16(a, b);
+
+  EXPECT_POISONED(c[0]);
+  EXPECT_POISONED(c[1]);
+  EXPECT_NOT_POISONED(c[2]);
+  EXPECT_NOT_POISONED(c[3]);
+  EXPECT_POISONED(c[4]);
+  EXPECT_NOT_POISONED(c[5]);
+  EXPECT_POISONED(c[6]);
+  EXPECT_NOT_POISONED(c[7]);
+
+  EXPECT_EQ(c[2], U1_max);
+  EXPECT_EQ(c[3], 4);
+  EXPECT_EQ(c[5], U1_max - 1);
+  EXPECT_EQ(c[7], U1_max);
+}
+
+TEST(VectorSadTest, sse2_psad_bw) {
+  V16x8 a = {Poisoned<U1>(), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+  V16x8 b = {100, 101, 102, 103, 104, 105, 106, 107,
+             108, 109, 110, 111, 112, 113, 114, 115};
+  V2x64 c = _mm_sad_epu8(a, b);
+
+  EXPECT_POISONED(c[0]);
+  EXPECT_NOT_POISONED(c[1]);
+
+  EXPECT_EQ(800U, c[1]);
+}
+
+TEST(VectorMaddTest, mmx_pmadd_wd) {
+  V4x16 a = {Poisoned<U2>(), 1, 2, 3};
+  V4x16 b = {100, 101, 102, 103};
+  V2x32 c = _mm_madd_pi16(a, b);
+
+  EXPECT_POISONED(c[0]);
+  EXPECT_NOT_POISONED(c[1]);
+
+  EXPECT_EQ((unsigned)(2 * 102 + 3 * 103), c[1]);
+}
+#endif  // defined(__clang__)
 
 TEST(MemorySanitizerDr, StoreInDSOTest) {
   if (!__msan_has_dynamic_component()) return;
@@ -3970,7 +4069,7 @@
 TEST(MemorySanitizerAllocator, get_estimated_allocated_size) {
   size_t sizes[] = {0, 20, 5000, 1<<20};
   for (size_t i = 0; i < sizeof(sizes) / sizeof(*sizes); ++i) {
-    size_t alloc_size = __msan_get_estimated_allocated_size(sizes[i]);
+    size_t alloc_size = __sanitizer_get_estimated_allocated_size(sizes[i]);
     EXPECT_EQ(alloc_size, sizes[i]);
   }
 }
@@ -3979,26 +4078,26 @@
   char *array = reinterpret_cast<char*>(malloc(100));
   int *int_ptr = new int;
 
-  EXPECT_TRUE(__msan_get_ownership(array));
-  EXPECT_EQ(100U, __msan_get_allocated_size(array));
+  EXPECT_TRUE(__sanitizer_get_ownership(array));
+  EXPECT_EQ(100U, __sanitizer_get_allocated_size(array));
 
-  EXPECT_TRUE(__msan_get_ownership(int_ptr));
-  EXPECT_EQ(sizeof(*int_ptr), __msan_get_allocated_size(int_ptr));
+  EXPECT_TRUE(__sanitizer_get_ownership(int_ptr));
+  EXPECT_EQ(sizeof(*int_ptr), __sanitizer_get_allocated_size(int_ptr));
 
   void *wild_addr = reinterpret_cast<void*>(0x1);
-  EXPECT_FALSE(__msan_get_ownership(wild_addr));
-  EXPECT_EQ(0U, __msan_get_allocated_size(wild_addr));
+  EXPECT_FALSE(__sanitizer_get_ownership(wild_addr));
+  EXPECT_EQ(0U, __sanitizer_get_allocated_size(wild_addr));
 
-  EXPECT_FALSE(__msan_get_ownership(array + 50));
-  EXPECT_EQ(0U, __msan_get_allocated_size(array + 50));
+  EXPECT_FALSE(__sanitizer_get_ownership(array + 50));
+  EXPECT_EQ(0U, __sanitizer_get_allocated_size(array + 50));
 
-  // NULL is a valid argument for GetAllocatedSize but is not owned.                                                  
-  EXPECT_FALSE(__msan_get_ownership(NULL));
-  EXPECT_EQ(0U, __msan_get_allocated_size(NULL));
- 
+  // NULL is a valid argument for GetAllocatedSize but is not owned.
+  EXPECT_FALSE(__sanitizer_get_ownership(NULL));
+  EXPECT_EQ(0U, __sanitizer_get_allocated_size(NULL));
+
   free(array);
-  EXPECT_FALSE(__msan_get_ownership(array));
-  EXPECT_EQ(0U, __msan_get_allocated_size(array));
+  EXPECT_FALSE(__sanitizer_get_ownership(array));
+  EXPECT_EQ(0U, __sanitizer_get_allocated_size(array));
 
   delete int_ptr;
 }
diff --git a/lib/profile/InstrProfilingFile.c b/lib/profile/InstrProfilingFile.c
index 2f77e31..5fb78e3 100644
--- a/lib/profile/InstrProfilingFile.c
+++ b/lib/profile/InstrProfilingFile.c
@@ -8,7 +8,6 @@
 \*===----------------------------------------------------------------------===*/
 
 #include "InstrProfiling.h"
-#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/lib/sanitizer_common/CMakeLists.txt b/lib/sanitizer_common/CMakeLists.txt
index 19c931a..7ad3f31 100644
--- a/lib/sanitizer_common/CMakeLists.txt
+++ b/lib/sanitizer_common/CMakeLists.txt
@@ -45,6 +45,7 @@
 set(SANITIZER_HEADERS
   sanitizer_addrhashmap.h
   sanitizer_allocator.h
+  sanitizer_allocator_interface.h
   sanitizer_allocator_internal.h
   sanitizer_atomic.h
   sanitizer_atomic_clang.h
diff --git a/lib/sanitizer_common/sanitizer_allocator.h b/lib/sanitizer_common/sanitizer_allocator.h
index a8debd9..c83c672 100644
--- a/lib/sanitizer_common/sanitizer_allocator.h
+++ b/lib/sanitizer_common/sanitizer_allocator.h
@@ -198,14 +198,12 @@
 
 // Memory allocator statistics
 enum AllocatorStat {
-  AllocatorStatMalloced,
-  AllocatorStatFreed,
-  AllocatorStatMmapped,
-  AllocatorStatUnmapped,
+  AllocatorStatAllocated,
+  AllocatorStatMapped,
   AllocatorStatCount
 };
 
-typedef u64 AllocatorStatCounters[AllocatorStatCount];
+typedef uptr AllocatorStatCounters[AllocatorStatCount];
 
 // Per-thread stats, live in per-thread cache.
 class AllocatorStats {
@@ -214,16 +212,21 @@
     internal_memset(this, 0, sizeof(*this));
   }
 
-  void Add(AllocatorStat i, u64 v) {
+  void Add(AllocatorStat i, uptr v) {
     v += atomic_load(&stats_[i], memory_order_relaxed);
     atomic_store(&stats_[i], v, memory_order_relaxed);
   }
 
-  void Set(AllocatorStat i, u64 v) {
+  void Sub(AllocatorStat i, uptr v) {
+    v = atomic_load(&stats_[i], memory_order_relaxed) - v;
     atomic_store(&stats_[i], v, memory_order_relaxed);
   }
 
-  u64 Get(AllocatorStat i) const {
+  void Set(AllocatorStat i, uptr v) {
+    atomic_store(&stats_[i], v, memory_order_relaxed);
+  }
+
+  uptr Get(AllocatorStat i) const {
     return atomic_load(&stats_[i], memory_order_relaxed);
   }
 
@@ -231,7 +234,7 @@
   friend class AllocatorGlobalStats;
   AllocatorStats *next_;
   AllocatorStats *prev_;
-  atomic_uint64_t stats_[AllocatorStatCount];
+  atomic_uintptr_t stats_[AllocatorStatCount];
 };
 
 // Global stats, used for aggregation and querying.
@@ -260,7 +263,7 @@
   }
 
   void Get(AllocatorStatCounters s) const {
-    internal_memset(s, 0, AllocatorStatCount * sizeof(u64));
+    internal_memset(s, 0, AllocatorStatCount * sizeof(uptr));
     SpinMutexLock l(&mu_);
     const AllocatorStats *stats = this;
     for (;;) {
@@ -270,6 +273,9 @@
       if (stats == this)
         break;
     }
+    // All stats must be non-negative.
+    for (int i = 0; i < AllocatorStatCount; i++)
+      s[i] = ((sptr)s[i]) >= 0 ? s[i] : 0;
   }
 
  private:
@@ -522,7 +528,7 @@
         map_size += kUserMapSize;
       CHECK_GE(region->mapped_user + map_size, end_idx);
       MapWithCallback(region_beg + region->mapped_user, map_size);
-      stat->Add(AllocatorStatMmapped, map_size);
+      stat->Add(AllocatorStatMapped, map_size);
       region->mapped_user += map_size;
     }
     uptr total_count = (region->mapped_user - beg_idx - size)
@@ -841,7 +847,7 @@
     uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
                                       "SizeClassAllocator32"));
     MapUnmapCallback().OnMap(res, kRegionSize);
-    stat->Add(AllocatorStatMmapped, kRegionSize);
+    stat->Add(AllocatorStatMapped, kRegionSize);
     CHECK_EQ(0U, (res & (kRegionSize - 1)));
     possible_regions.set(ComputeRegionId(res), static_cast<u8>(class_id));
     return res;
@@ -907,7 +913,7 @@
   void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
     CHECK_NE(class_id, 0UL);
     CHECK_LT(class_id, kNumClasses);
-    stats_.Add(AllocatorStatMalloced, SizeClassMap::Size(class_id));
+    stats_.Add(AllocatorStatAllocated, SizeClassMap::Size(class_id));
     PerClass *c = &per_class_[class_id];
     if (UNLIKELY(c->count == 0))
       Refill(allocator, class_id);
@@ -922,7 +928,7 @@
     // If the first allocator call on a new thread is a deallocation, then
     // max_count will be zero, leading to check failure.
     InitCache();
-    stats_.Add(AllocatorStatFreed, SizeClassMap::Size(class_id));
+    stats_.Sub(AllocatorStatAllocated, SizeClassMap::Size(class_id));
     PerClass *c = &per_class_[class_id];
     CHECK_NE(c->max_count, 0UL);
     if (UNLIKELY(c->count == c->max_count))
@@ -1033,8 +1039,8 @@
       stats.currently_allocated += map_size;
       stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated);
       stats.by_size_log[size_log]++;
-      stat->Add(AllocatorStatMalloced, map_size);
-      stat->Add(AllocatorStatMmapped, map_size);
+      stat->Add(AllocatorStatAllocated, map_size);
+      stat->Add(AllocatorStatMapped, map_size);
     }
     return reinterpret_cast<void*>(res);
   }
@@ -1052,8 +1058,8 @@
       chunks_sorted_ = false;
       stats.n_frees++;
       stats.currently_allocated -= h->map_size;
-      stat->Add(AllocatorStatFreed, h->map_size);
-      stat->Add(AllocatorStatUnmapped, h->map_size);
+      stat->Sub(AllocatorStatAllocated, h->map_size);
+      stat->Sub(AllocatorStatMapped, h->map_size);
     }
     MapUnmapCallback().OnUnmap(h->map_beg, h->map_size);
     UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size);
diff --git a/lib/sanitizer_common/sanitizer_allocator_interface.h b/lib/sanitizer_common/sanitizer_allocator_interface.h
new file mode 100644
index 0000000..2cd924c
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_allocator_interface.h
@@ -0,0 +1,38 @@
+//===-- sanitizer_allocator_interface.h ------------------------- C++ -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Re-declaration of functions from public sanitizer allocator interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_ALLOCATOR_INTERFACE_H
+#define SANITIZER_ALLOCATOR_INTERFACE_H
+
+#include "sanitizer_internal_defs.h"
+
+using __sanitizer::uptr;
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+uptr __sanitizer_get_estimated_allocated_size(uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE int __sanitizer_get_ownership(const void *p);
+SANITIZER_INTERFACE_ATTRIBUTE uptr
+__sanitizer_get_allocated_size(const void *p);
+SANITIZER_INTERFACE_ATTRIBUTE uptr __sanitizer_get_current_allocated_bytes();
+SANITIZER_INTERFACE_ATTRIBUTE uptr __sanitizer_get_heap_size();
+SANITIZER_INTERFACE_ATTRIBUTE uptr __sanitizer_get_free_bytes();
+SANITIZER_INTERFACE_ATTRIBUTE uptr __sanitizer_get_unmapped_bytes();
+
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+    /* OPTIONAL */ void __sanitizer_malloc_hook(void *ptr, uptr size);
+SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE
+    /* OPTIONAL */ void __sanitizer_free_hook(void *ptr);
+}  // extern "C"
+
+#endif  // SANITIZER_ALLOCATOR_INTERFACE_H
diff --git a/lib/sanitizer_common/sanitizer_atomic_msvc.h b/lib/sanitizer_common/sanitizer_atomic_msvc.h
index bff5593..12ffef3 100644
--- a/lib/sanitizer_common/sanitizer_atomic_msvc.h
+++ b/lib/sanitizer_common/sanitizer_atomic_msvc.h
@@ -33,33 +33,20 @@
     long long volatile *Destination,              // NOLINT
     long long Exchange, long long Comparand);     // NOLINT
 #pragma intrinsic(_InterlockedCompareExchange64)
-
-#ifdef _WIN64
-extern "C" long long _InterlockedExchangeAdd64(     // NOLINT
-    long long volatile * Addend, long long Value);  // NOLINT
-#pragma intrinsic(_InterlockedExchangeAdd64)
 extern "C" void *_InterlockedCompareExchangePointer(
     void *volatile *Destination,
     void *Exchange, void *Comparand);
 #pragma intrinsic(_InterlockedCompareExchangePointer)
-#else
-// There's no _InterlockedCompareExchangePointer intrinsic on x86,
-// so call _InterlockedCompareExchange instead.
 extern "C"
 long __cdecl _InterlockedCompareExchange(  // NOLINT
     long volatile *Destination,            // NOLINT
     long Exchange, long Comparand);        // NOLINT
 #pragma intrinsic(_InterlockedCompareExchange)
 
-inline static void *_InterlockedCompareExchangePointer(
-    void *volatile *Destination,
-    void *Exchange, void *Comparand) {
-  return reinterpret_cast<void*>(
-      _InterlockedCompareExchange(
-          reinterpret_cast<long volatile*>(Destination),  // NOLINT
-          reinterpret_cast<long>(Exchange),               // NOLINT
-          reinterpret_cast<long>(Comparand)));            // NOLINT
-}
+#ifdef _WIN64
+extern "C" long long _InterlockedExchangeAdd64(     // NOLINT
+    long long volatile * Addend, long long Value);  // NOLINT
+#pragma intrinsic(_InterlockedExchangeAdd64)
 #endif
 
 namespace __sanitizer {
diff --git a/lib/sanitizer_common/sanitizer_common.cc b/lib/sanitizer_common/sanitizer_common.cc
index 05bd876..6b76714 100644
--- a/lib/sanitizer_common/sanitizer_common.cc
+++ b/lib/sanitizer_common/sanitizer_common.cc
@@ -218,10 +218,11 @@
   n_ranges_ = 0;
 }
 
-void LoadedModule::addAddressRange(uptr beg, uptr end) {
+void LoadedModule::addAddressRange(uptr beg, uptr end, bool executable) {
   CHECK_LT(n_ranges_, kMaxNumberOfAddressRanges);
   ranges_[n_ranges_].beg = beg;
   ranges_[n_ranges_].end = end;
+  exec_[n_ranges_] = executable;
   n_ranges_++;
 }
 
diff --git a/lib/sanitizer_common/sanitizer_common.h b/lib/sanitizer_common/sanitizer_common.h
index 44f56ff..6dc9500 100644
--- a/lib/sanitizer_common/sanitizer_common.h
+++ b/lib/sanitizer_common/sanitizer_common.h
@@ -28,7 +28,11 @@
 const uptr kWordSize = SANITIZER_WORDSIZE / 8;
 const uptr kWordSizeInBits = 8 * kWordSize;
 
-const uptr kCacheLineSize = 64;
+#if defined(__powerpc__) || defined(__powerpc64__)
+  const uptr kCacheLineSize = 128;
+#else
+  const uptr kCacheLineSize = 64;
+#endif
 
 const uptr kMaxPathLength = 512;
 
@@ -188,7 +192,9 @@
 void CovPrepareForSandboxing(__sanitizer_sandbox_arguments *args);
 void SetSandboxingCallback(void (*f)());
 
-void CovUpdateMapping();
+void CovUpdateMapping(uptr caller_pc = 0);
+void CovBeforeFork();
+void CovAfterFork(int child_pid);
 
 void InitTlsSize();
 uptr GetTlsSize();
@@ -476,7 +482,7 @@
 class LoadedModule {
  public:
   LoadedModule(const char *module_name, uptr base_address);
-  void addAddressRange(uptr beg, uptr end);
+  void addAddressRange(uptr beg, uptr end, bool executable);
   bool containsAddress(uptr address) const;
 
   const char *full_name() const { return full_name_; }
@@ -485,6 +491,7 @@
   uptr n_ranges() const { return n_ranges_; }
   uptr address_range_start(int i) const { return ranges_[i].beg; }
   uptr address_range_end(int i) const { return ranges_[i].end; }
+  bool address_range_executable(int i) const { return exec_[i]; }
 
  private:
   struct AddressRange {
@@ -495,6 +502,7 @@
   uptr base_address_;
   static const uptr kMaxNumberOfAddressRanges = 6;
   AddressRange ranges_[kMaxNumberOfAddressRanges];
+  bool exec_[kMaxNumberOfAddressRanges];
   uptr n_ranges_;
 };
 
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors.inc b/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 4296ec0..64ddeed 100644
--- a/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -13,6 +13,7 @@
 // This file should be included into the tool's interceptor file,
 // which has to define it's own macros:
 //   COMMON_INTERCEPTOR_ENTER
+//   COMMON_INTERCEPTOR_ENTER_NOIGNORE
 //   COMMON_INTERCEPTOR_READ_RANGE
 //   COMMON_INTERCEPTOR_WRITE_RANGE
 //   COMMON_INTERCEPTOR_INITIALIZE_RANGE
@@ -270,8 +271,9 @@
 INTERCEPTOR(double, frexp, double x, int *exp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, frexp, x, exp);
-  double res = REAL(frexp)(x, exp);
+  // Assuming frexp() always writes to |exp|.
   COMMON_INTERCEPTOR_WRITE_RANGE(ctx, exp, sizeof(*exp));
+  double res = REAL(frexp)(x, exp);
   return res;
 }
 
@@ -284,6 +286,9 @@
 INTERCEPTOR(float, frexpf, float x, int *exp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, frexpf, x, exp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   float res = REAL(frexpf)(x, exp);
   COMMON_INTERCEPTOR_WRITE_RANGE(ctx, exp, sizeof(*exp));
   return res;
@@ -292,6 +297,9 @@
 INTERCEPTOR(long double, frexpl, long double x, int *exp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, frexpl, x, exp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   long double res = REAL(frexpl)(x, exp);
   COMMON_INTERCEPTOR_WRITE_RANGE(ctx, exp, sizeof(*exp));
   return res;
@@ -330,6 +338,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, read, fd, ptr, count);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(read)(fd, ptr, count);
   if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
   if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
@@ -345,6 +356,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, pread, fd, ptr, count, offset);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(pread)(fd, ptr, count, offset);
   if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
   if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
@@ -360,6 +374,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, pread64, fd, ptr, count, offset);
   COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(pread64)(fd, ptr, count, offset);
   if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
   if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
@@ -538,9 +555,11 @@
 INTERCEPTOR(unsigned long, time, unsigned long *t) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, time, t);
-  unsigned long res = REAL(time)(t);
+  unsigned long local_t;
+  unsigned long res = REAL(time)(&local_t);
   if (t && res != (unsigned long)-1) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, t, sizeof(*t));
+    *t = local_t;
   }
   return res;
 }
@@ -602,6 +621,9 @@
 INTERCEPTOR(char *, ctime, unsigned long *timep) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, ctime, timep);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(ctime)(timep);
   if (res) {
     COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
@@ -612,6 +634,9 @@
 INTERCEPTOR(char *, ctime_r, unsigned long *timep, char *result) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, ctime_r, timep, result);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(ctime_r)(timep, result);
   if (res) {
     COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
@@ -622,6 +647,9 @@
 INTERCEPTOR(char *, asctime, __sanitizer_tm *tm) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, asctime, tm);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(asctime)(tm);
   if (res) {
     COMMON_INTERCEPTOR_READ_RANGE(ctx, tm, sizeof(*tm));
@@ -632,6 +660,9 @@
 INTERCEPTOR(char *, asctime_r, __sanitizer_tm *tm, char *result) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, asctime_r, tm, result);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(asctime_r)(tm, result);
   if (res) {
     COMMON_INTERCEPTOR_READ_RANGE(ctx, tm, sizeof(*tm));
@@ -673,6 +704,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, strptime, s, format, tm);
   if (format)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, format, REAL(strlen)(format) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(strptime)(s, format, tm);
   if (res) {
     COMMON_INTERCEPTOR_READ_RANGE(ctx, s, res - s);
@@ -807,6 +841,9 @@
     return res;                                                                \
   }
 
+// FIXME: under ASan the REAL() call below may write to freed memory and
+// corrupt its metadata. See
+// https://code.google.com/p/address-sanitizer/issues/detail?id=321.
 #define VSPRINTF_INTERCEPTOR_IMPL(vname, str, ...)                             \
   {                                                                            \
     VPRINTF_INTERCEPTOR_ENTER(vname, str, __VA_ARGS__)                         \
@@ -821,6 +858,9 @@
     return res;                                                                \
   }
 
+// FIXME: under ASan the REAL() call below may write to freed memory and
+// corrupt its metadata. See
+// https://code.google.com/p/address-sanitizer/issues/detail?id=321.
 #define VSNPRINTF_INTERCEPTOR_IMPL(vname, str, size, ...)                      \
   {                                                                            \
     VPRINTF_INTERCEPTOR_ENTER(vname, str, size, __VA_ARGS__)                   \
@@ -835,6 +875,9 @@
     return res;                                                                \
   }
 
+// FIXME: under ASan the REAL() call below may write to freed memory and
+// corrupt its metadata. See
+// https://code.google.com/p/address-sanitizer/issues/detail?id=321.
 #define VASPRINTF_INTERCEPTOR_IMPL(vname, strp, ...)                           \
   {                                                                            \
     VPRINTF_INTERCEPTOR_ENTER(vname, strp, __VA_ARGS__)                        \
@@ -1082,6 +1125,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getpwnam_r, name, pwd, buf, buflen, result);
   COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getpwnam_r)(name, pwd, buf, buflen, result);
   if (!res) {
     if (result && *result) unpoison_passwd(ctx, *result);
@@ -1094,6 +1140,9 @@
             SIZE_T buflen, __sanitizer_passwd **result) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getpwuid_r, uid, pwd, buf, buflen, result);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getpwuid_r)(uid, pwd, buf, buflen, result);
   if (!res) {
     if (result && *result) unpoison_passwd(ctx, *result);
@@ -1107,6 +1156,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getgrnam_r, name, grp, buf, buflen, result);
   COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getgrnam_r)(name, grp, buf, buflen, result);
   if (!res) {
     if (result && *result) unpoison_group(ctx, *result);
@@ -1119,6 +1171,9 @@
             SIZE_T buflen, __sanitizer_group **result) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getgrgid_r, gid, grp, buf, buflen, result);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getgrgid_r)(gid, grp, buf, buflen, result);
   if (!res) {
     if (result && *result) unpoison_group(ctx, *result);
@@ -1185,6 +1240,9 @@
             SIZE_T buflen, __sanitizer_passwd **pwbufp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getpwent_r, pwbuf, buf, buflen, pwbufp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getpwent_r)(pwbuf, buf, buflen, pwbufp);
   if (!res) {
     if (pwbufp && *pwbufp) unpoison_passwd(ctx, *pwbufp);
@@ -1197,6 +1255,9 @@
             SIZE_T buflen, __sanitizer_passwd **pwbufp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, fgetpwent_r, fp, pwbuf, buf, buflen, pwbufp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(fgetpwent_r)(fp, pwbuf, buf, buflen, pwbufp);
   if (!res) {
     if (pwbufp && *pwbufp) unpoison_passwd(ctx, *pwbufp);
@@ -1209,6 +1270,9 @@
             __sanitizer_group **pwbufp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getgrent_r, pwbuf, buf, buflen, pwbufp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getgrent_r)(pwbuf, buf, buflen, pwbufp);
   if (!res) {
     if (pwbufp && *pwbufp) unpoison_group(ctx, *pwbufp);
@@ -1221,6 +1285,9 @@
             SIZE_T buflen, __sanitizer_group **pwbufp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, fgetgrent_r, fp, pwbuf, buf, buflen, pwbufp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(fgetgrent_r)(fp, pwbuf, buf, buflen, pwbufp);
   if (!res) {
     if (pwbufp && *pwbufp) unpoison_group(ctx, *pwbufp);
@@ -1275,6 +1342,9 @@
 INTERCEPTOR(int, clock_getres, u32 clk_id, void *tp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, clock_getres, clk_id, tp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(clock_getres)(clk_id, tp);
   if (!res && tp) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tp, struct_timespec_sz);
@@ -1284,6 +1354,9 @@
 INTERCEPTOR(int, clock_gettime, u32 clk_id, void *tp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, clock_gettime, clk_id, tp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(clock_gettime)(clk_id, tp);
   if (!res) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tp, struct_timespec_sz);
@@ -1308,6 +1381,9 @@
 INTERCEPTOR(int, getitimer, int which, void *curr_value) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getitimer, which, curr_value);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getitimer)(which, curr_value);
   if (!res && curr_value) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, curr_value, struct_itimerval_sz);
@@ -1319,6 +1395,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, setitimer, which, new_value, old_value);
   if (new_value)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, new_value, struct_itimerval_sz);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(setitimer)(which, new_value, old_value);
   if (!res && old_value) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, old_value, struct_itimerval_sz);
@@ -1448,6 +1527,9 @@
 INTERCEPTOR_WITH_SUFFIX(int, wait, int *status) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, wait, status);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(wait)(status);
   if (res != -1 && status)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
@@ -1457,6 +1539,9 @@
                         int options) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, waitid, idtype, id, infop, options);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(waitid)(idtype, id, infop, options);
   if (res != -1 && infop)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, infop, siginfo_t_sz);
@@ -1465,6 +1550,9 @@
 INTERCEPTOR_WITH_SUFFIX(int, waitpid, int pid, int *status, int options) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, waitpid, pid, status, options);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(waitpid)(pid, status, options);
   if (res != -1 && status)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
@@ -1473,6 +1561,9 @@
 INTERCEPTOR(int, wait3, int *status, int options, void *rusage) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, wait3, status, options, rusage);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(wait3)(status, options, rusage);
   if (res != -1) {
     if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
@@ -1484,6 +1575,9 @@
 INTERCEPTOR(int, __wait4, int pid, int *status, int options, void *rusage) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, __wait4, pid, status, options, rusage);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(__wait4)(pid, status, options, rusage);
   if (res != -1) {
     if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
@@ -1496,6 +1590,9 @@
 INTERCEPTOR(int, wait4, int pid, int *status, int options, void *rusage) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, wait4, pid, status, options, rusage);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(wait4)(pid, status, options, rusage);
   if (res != -1) {
     if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
@@ -1522,6 +1619,9 @@
   uptr sz = __sanitizer_in_addr_sz(af);
   if (sz) COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sz);
   // FIXME: figure out read size based on the address family.
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(inet_ntop)(af, src, dst, size);
   if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
   return res;
@@ -1530,6 +1630,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, inet_pton, af, src, dst);
   // FIXME: figure out read size based on the address family.
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(inet_pton)(af, src, dst);
   if (res == 1) {
     uptr sz = __sanitizer_in_addr_sz(af);
@@ -1549,6 +1652,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, inet_aton, cp, dst);
   if (cp) COMMON_INTERCEPTOR_READ_RANGE(ctx, cp, REAL(strlen)(cp) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(inet_aton)(cp, dst);
   if (res != 0) {
     uptr sz = __sanitizer_in_addr_sz(af_inet);
@@ -1565,6 +1671,9 @@
 INTERCEPTOR(int, pthread_getschedparam, uptr thread, int *policy, int *param) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, pthread_getschedparam, thread, policy, param);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(pthread_getschedparam)(thread, policy, param);
   if (res == 0) {
     if (policy) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, policy, sizeof(*policy));
@@ -1589,6 +1698,9 @@
     COMMON_INTERCEPTOR_READ_RANGE(ctx, service, REAL(strlen)(service) + 1);
   if (hints)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, hints, sizeof(__sanitizer_addrinfo));
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getaddrinfo)(node, service, hints, out);
   if (res == 0 && out) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, out, sizeof(*out));
@@ -1618,6 +1730,9 @@
                            serv, servlen, flags);
   // FIXME: consider adding READ_RANGE(sockaddr, salen)
   // There is padding in in_addr that may make this too noisy
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res =
       REAL(getnameinfo)(sockaddr, salen, host, hostlen, serv, servlen, flags);
   if (res == 0) {
@@ -1639,6 +1754,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, getsockname, sock_fd, addr, addrlen);
   COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
   int addrlen_in = *addrlen;
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getsockname)(sock_fd, addr, addrlen);
   if (res == 0) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addrlen_in, *addrlen));
@@ -1716,11 +1834,38 @@
 #endif
 
 #if SANITIZER_INTERCEPT_GETHOSTBYNAME_R
+INTERCEPTOR(int, gethostbyname_r, char *name, struct __sanitizer_hostent *ret,
+            char *buf, SIZE_T buflen, __sanitizer_hostent **result,
+            int *h_errnop) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname_r, name, ret, buf, buflen, result,
+                           h_errnop);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
+  int res = REAL(gethostbyname_r)(name, ret, buf, buflen, result, h_errnop);
+  if (result) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
+    if (res == 0 && *result) write_hostent(ctx, *result);
+  }
+  if (h_errnop)
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
+  return res;
+}
+#define INIT_GETHOSTBYNAME_R COMMON_INTERCEPT_FUNCTION(gethostbyname_r);
+#else
+#define INIT_GETHOSTBYNAME_R
+#endif
+
+#if SANITIZER_INTERCEPT_GETHOSTENT_R
 INTERCEPTOR(int, gethostent_r, struct __sanitizer_hostent *ret, char *buf,
             SIZE_T buflen, __sanitizer_hostent **result, int *h_errnop) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, gethostent_r, ret, buf, buflen, result,
                            h_errnop);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(gethostent_r)(ret, buf, buflen, result, h_errnop);
   if (result) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
@@ -1730,7 +1875,13 @@
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
   return res;
 }
+#define INIT_GETHOSTENT_R                  \
+  COMMON_INTERCEPT_FUNCTION(gethostent_r);
+#else
+#define INIT_GETHOSTENT_R
+#endif
 
+#if SANITIZER_INTERCEPT_GETHOSTBYADDR_R
 INTERCEPTOR(int, gethostbyaddr_r, void *addr, int len, int type,
             struct __sanitizer_hostent *ret, char *buf, SIZE_T buflen,
             __sanitizer_hostent **result, int *h_errnop) {
@@ -1738,6 +1889,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, gethostbyaddr_r, addr, len, type, ret, buf,
                            buflen, result, h_errnop);
   COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, len);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(gethostbyaddr_r)(addr, len, type, ret, buf, buflen, result,
                                   h_errnop);
   if (result) {
@@ -1748,29 +1902,22 @@
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
   return res;
 }
+#define INIT_GETHOSTBYADDR_R                  \
+  COMMON_INTERCEPT_FUNCTION(gethostbyaddr_r);
+#else
+#define INIT_GETHOSTBYADDR_R
+#endif
 
-INTERCEPTOR(int, gethostbyname_r, char *name, struct __sanitizer_hostent *ret,
-            char *buf, SIZE_T buflen, __sanitizer_hostent **result,
-            int *h_errnop) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname_r, name, ret, buf, buflen, result,
-                           h_errnop);
-  int res = REAL(gethostbyname_r)(name, ret, buf, buflen, result, h_errnop);
-  if (result) {
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
-    if (res == 0 && *result) write_hostent(ctx, *result);
-  }
-  if (h_errnop)
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
-  return res;
-}
-
+#if SANITIZER_INTERCEPT_GETHOSTBYNAME2_R
 INTERCEPTOR(int, gethostbyname2_r, char *name, int af,
             struct __sanitizer_hostent *ret, char *buf, SIZE_T buflen,
             __sanitizer_hostent **result, int *h_errnop) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname2_r, name, af, ret, buf, buflen,
                            result, h_errnop);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res =
       REAL(gethostbyname2_r)(name, af, ret, buf, buflen, result, h_errnop);
   if (result) {
@@ -1781,13 +1928,10 @@
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
   return res;
 }
-#define INIT_GETHOSTBYNAME_R                  \
-  COMMON_INTERCEPT_FUNCTION(gethostent_r);    \
-  COMMON_INTERCEPT_FUNCTION(gethostbyaddr_r); \
-  COMMON_INTERCEPT_FUNCTION(gethostbyname_r); \
+#define INIT_GETHOSTBYNAME2_R                  \
   COMMON_INTERCEPT_FUNCTION(gethostbyname2_r);
 #else
-#define INIT_GETHOSTBYNAME_R
+#define INIT_GETHOSTBYNAME2_R
 #endif
 
 #if SANITIZER_INTERCEPT_GETSOCKOPT
@@ -1797,6 +1941,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, getsockopt, sockfd, level, optname, optval,
                            optlen);
   if (optlen) COMMON_INTERCEPTOR_READ_RANGE(ctx, optlen, sizeof(*optlen));
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getsockopt)(sockfd, level, optname, optval, optlen);
   if (res == 0)
     if (optval && optlen) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, optval, *optlen);
@@ -1811,7 +1958,7 @@
 INTERCEPTOR(int, accept, int fd, void *addr, unsigned *addrlen) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, accept, fd, addr, addrlen);
-  unsigned addrlen0;
+  unsigned addrlen0 = 0;
   if (addrlen) {
     COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
     addrlen0 = *addrlen;
@@ -1833,11 +1980,14 @@
 INTERCEPTOR(int, accept4, int fd, void *addr, unsigned *addrlen, int f) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, accept4, fd, addr, addrlen, f);
-  unsigned addrlen0;
+  unsigned addrlen0 = 0;
   if (addrlen) {
     COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
     addrlen0 = *addrlen;
   }
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int fd2 = REAL(accept4)(fd, addr, addrlen, f);
   if (fd2 >= 0) {
     if (fd >= 0) COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, fd2);
@@ -1855,6 +2005,9 @@
 INTERCEPTOR(double, modf, double x, double *iptr) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, modf, x, iptr);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   double res = REAL(modf)(x, iptr);
   if (iptr) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iptr, sizeof(*iptr));
@@ -1864,6 +2017,9 @@
 INTERCEPTOR(float, modff, float x, float *iptr) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, modff, x, iptr);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   float res = REAL(modff)(x, iptr);
   if (iptr) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iptr, sizeof(*iptr));
@@ -1873,6 +2029,9 @@
 INTERCEPTOR(long double, modfl, long double x, long double *iptr) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, modfl, x, iptr);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   long double res = REAL(modfl)(x, iptr);
   if (iptr) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iptr, sizeof(*iptr));
@@ -1905,6 +2064,9 @@
             int flags) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, recvmsg, fd, msg, flags);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(recvmsg)(fd, msg, flags);
   if (res >= 0) {
     if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
@@ -1926,6 +2088,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, getpeername, sockfd, addr, addrlen);
   unsigned addr_sz;
   if (addrlen) addr_sz = *addrlen;
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getpeername)(sockfd, addr, addrlen);
   if (!res && addr && addrlen)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen));
@@ -1939,6 +2104,9 @@
 #if SANITIZER_INTERCEPT_SYSINFO
 INTERCEPTOR(int, sysinfo, void *info) {
   void *ctx;
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   COMMON_INTERCEPTOR_ENTER(ctx, sysinfo, info);
   int res = REAL(sysinfo)(info);
   if (!res && info)
@@ -1954,6 +2122,9 @@
 INTERCEPTOR(__sanitizer_dirent *, readdir, void *dirp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, readdir, dirp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   __sanitizer_dirent *res = REAL(readdir)(dirp);
   if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, res->d_reclen);
   return res;
@@ -1963,6 +2134,9 @@
             __sanitizer_dirent **result) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, readdir_r, dirp, entry, result);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(readdir_r)(dirp, entry, result);
   if (!res) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
@@ -1983,6 +2157,9 @@
 INTERCEPTOR(__sanitizer_dirent64 *, readdir64, void *dirp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, readdir64, dirp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   __sanitizer_dirent64 *res = REAL(readdir64)(dirp);
   if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, res->d_reclen);
   return res;
@@ -1992,6 +2169,9 @@
             __sanitizer_dirent64 **result) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, readdir64_r, dirp, entry, result);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(readdir64_r)(dirp, entry, result);
   if (!res) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
@@ -2027,10 +2207,13 @@
     }
   }
 
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   uptr res = REAL(ptrace)(request, pid, addr, data);
 
   if (!res && data) {
-    // Note that PEEK* requests assing different meaning to the return value.
+    // Note that PEEK* requests assign different meaning to the return value.
     // This function does not handle them (nor does it need to).
     if (request == ptrace_getregs)
       COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_regs_struct_sz);
@@ -2040,6 +2223,8 @@
       COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_fpxregs_struct_sz);
     else if (request == ptrace_getsiginfo)
       COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, siginfo_t_sz);
+    else if (request == ptrace_geteventmsg)
+      COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, sizeof(unsigned long));
     else if (request == ptrace_getregset) {
       __sanitizer_iovec *iov = (__sanitizer_iovec *)data;
       COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iov->iov_base, iov->iov_len);
@@ -2073,6 +2258,9 @@
 INTERCEPTOR(char *, getcwd, char *buf, SIZE_T size) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getcwd, buf, size);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(getcwd)(buf, size);
   if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
   return res;
@@ -2086,6 +2274,9 @@
 INTERCEPTOR(char *, get_current_dir_name, int fake) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, get_current_dir_name, fake);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(get_current_dir_name)(fake);
   if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
   return res;
@@ -2101,6 +2292,9 @@
 INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   INTMAX_T res = REAL(strtoimax)(nptr, endptr, base);
   if (endptr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, endptr, sizeof(*endptr));
   return res;
@@ -2109,6 +2303,9 @@
 INTERCEPTOR(INTMAX_T, strtoumax, const char *nptr, char **endptr, int base) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, strtoumax, nptr, endptr, base);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   INTMAX_T res = REAL(strtoumax)(nptr, endptr, base);
   if (endptr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, endptr, sizeof(*endptr));
   return res;
@@ -2125,6 +2322,9 @@
 INTERCEPTOR(SIZE_T, mbstowcs, wchar_t *dest, const char *src, SIZE_T len) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, mbstowcs, dest, src, len);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SIZE_T res = REAL(mbstowcs)(dest, src, len);
   if (res != (SIZE_T) - 1 && dest) {
     SIZE_T write_cnt = res + (res < len);
@@ -2139,6 +2339,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, mbsrtowcs, dest, src, len, ps);
   if (src) COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
   if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SIZE_T res = REAL(mbsrtowcs)(dest, src, len, ps);
   if (res != (SIZE_T)(-1) && dest && src) {
     // This function, and several others, may or may not write the terminating
@@ -2166,6 +2369,9 @@
     if (nms) COMMON_INTERCEPTOR_READ_RANGE(ctx, *src, nms);
   }
   if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SIZE_T res = REAL(mbsnrtowcs)(dest, src, nms, len, ps);
   if (res != (SIZE_T)(-1) && dest && src) {
     SIZE_T write_cnt = res + !*src;
@@ -2183,6 +2389,9 @@
 INTERCEPTOR(SIZE_T, wcstombs, char *dest, const wchar_t *src, SIZE_T len) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, wcstombs, dest, src, len);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SIZE_T res = REAL(wcstombs)(dest, src, len);
   if (res != (SIZE_T) - 1 && dest) {
     SIZE_T write_cnt = res + (res < len);
@@ -2197,6 +2406,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, wcsrtombs, dest, src, len, ps);
   if (src) COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
   if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SIZE_T res = REAL(wcsrtombs)(dest, src, len, ps);
   if (res != (SIZE_T) - 1 && dest && src) {
     SIZE_T write_cnt = res + !*src;
@@ -2222,6 +2434,9 @@
     if (nms) COMMON_INTERCEPTOR_READ_RANGE(ctx, *src, nms);
   }
   if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SIZE_T res = REAL(wcsnrtombs)(dest, src, nms, len, ps);
   if (res != (SIZE_T) - 1 && dest && src) {
     SIZE_T write_cnt = res + !*src;
@@ -2239,6 +2454,9 @@
 INTERCEPTOR(int, tcgetattr, int fd, void *termios_p) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, tcgetattr, fd, termios_p);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(tcgetattr)(fd, termios_p);
   if (!res && termios_p)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, termios_p, struct_termios_sz);
@@ -2293,6 +2511,9 @@
 INTERCEPTOR(SIZE_T, confstr, int name, char *buf, SIZE_T len) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, confstr, name, buf, len);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SIZE_T res = REAL(confstr)(name, buf, len);
   if (buf && res)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res < len ? res : len);
@@ -2307,6 +2528,9 @@
 INTERCEPTOR(int, sched_getaffinity, int pid, SIZE_T cpusetsize, void *mask) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sched_getaffinity, pid, cpusetsize, mask);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(sched_getaffinity)(pid, cpusetsize, mask);
   if (mask && !res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mask, cpusetsize);
   return res;
@@ -2333,6 +2557,9 @@
 INTERCEPTOR(char *, strerror_r, int errnum, char *buf, SIZE_T buflen) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, strerror_r, errnum, buf, buflen);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(strerror_r)(errnum, buf, buflen);
   // There are 2 versions of strerror_r:
   //  * POSIX version returns 0 on success, negative error code on failure,
@@ -2361,6 +2588,9 @@
 INTERCEPTOR(int, __xpg_strerror_r, int errnum, char *buf, SIZE_T buflen) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, __xpg_strerror_r, errnum, buf, buflen);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(__xpg_strerror_r)(errnum, buf, buflen);
   // This version always returns a null-terminated string.
   if (buf && buflen)
@@ -2403,6 +2633,9 @@
   if (dirp) COMMON_INTERCEPTOR_READ_RANGE(ctx, dirp, REAL(strlen)(dirp) + 1);
   scandir_filter = filter;
   scandir_compar = compar;
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(scandir)(dirp, namelist, filter ? wrapped_scandir_filter : 0,
                           compar ? wrapped_scandir_compar : 0);
   scandir_filter = 0;
@@ -2452,6 +2685,9 @@
   if (dirp) COMMON_INTERCEPTOR_READ_RANGE(ctx, dirp, REAL(strlen)(dirp) + 1);
   scandir64_filter = filter;
   scandir64_compar = compar;
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res =
       REAL(scandir64)(dirp, namelist, filter ? wrapped_scandir64_filter : 0,
                       compar ? wrapped_scandir64_compar : 0);
@@ -2475,6 +2711,9 @@
 INTERCEPTOR(int, getgroups, int size, u32 *lst) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getgroups, size, lst);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getgroups)(size, lst);
   if (res && lst) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lst, res * sizeof(*lst));
   return res;
@@ -2538,6 +2777,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, wordexp, s, p, flags);
   if (s) COMMON_INTERCEPTOR_READ_RANGE(ctx, s, REAL(strlen)(s) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(wordexp)(s, p, flags);
   if (!res && p) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
@@ -2561,6 +2803,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sigwait, set, sig);
   // FIXME: read sigset_t when all of sigemptyset, etc are intercepted
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(sigwait)(set, sig);
   if (!res && sig) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sig, sizeof(*sig));
   return res;
@@ -2575,6 +2820,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sigwaitinfo, set, info);
   // FIXME: read sigset_t when all of sigemptyset, etc are intercepted
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(sigwaitinfo)(set, info);
   if (res > 0 && info) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, info, siginfo_t_sz);
   return res;
@@ -2591,6 +2839,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, sigtimedwait, set, info, timeout);
   if (timeout) COMMON_INTERCEPTOR_READ_RANGE(ctx, timeout, struct_timespec_sz);
   // FIXME: read sigset_t when all of sigemptyset, etc are intercepted
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(sigtimedwait)(set, info, timeout);
   if (res > 0 && info) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, info, siginfo_t_sz);
   return res;
@@ -2604,6 +2855,9 @@
 INTERCEPTOR(int, sigemptyset, __sanitizer_sigset_t *set) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sigemptyset, set);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(sigemptyset)(set);
   if (!res && set) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, set, sizeof(*set));
   return res;
@@ -2612,6 +2866,9 @@
 INTERCEPTOR(int, sigfillset, __sanitizer_sigset_t *set) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sigfillset, set);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(sigfillset)(set);
   if (!res && set) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, set, sizeof(*set));
   return res;
@@ -2627,6 +2884,9 @@
 INTERCEPTOR(int, sigpending, __sanitizer_sigset_t *set) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sigpending, set);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(sigpending)(set);
   if (!res && set) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, set, sizeof(*set));
   return res;
@@ -2642,6 +2902,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sigprocmask, how, set, oldset);
   // FIXME: read sigset_t when all of sigemptyset, etc are intercepted
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(sigprocmask)(how, set, oldset);
   if (!res && oldset)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldset, sizeof(*oldset));
@@ -2656,6 +2919,9 @@
 INTERCEPTOR(int, backtrace, void **buffer, int size) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, backtrace, buffer, size);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(backtrace)(buffer, size);
   if (res && buffer)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buffer, res * sizeof(*buffer));
@@ -2667,6 +2933,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, backtrace_symbols, buffer, size);
   if (buffer && size)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, buffer, size * sizeof(*buffer));
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char **res = REAL(backtrace_symbols)(buffer, size);
   if (res && size) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, size * sizeof(*res));
@@ -2772,6 +3041,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, statfs, path, buf);
   if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(statfs)(path, buf);
   if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs_sz);
   return res;
@@ -2779,6 +3051,9 @@
 INTERCEPTOR(int, fstatfs, int fd, void *buf) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, fstatfs, fd, buf);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(fstatfs)(fd, buf);
   if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs_sz);
   return res;
@@ -2795,6 +3070,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, statfs64, path, buf);
   if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(statfs64)(path, buf);
   if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs64_sz);
   return res;
@@ -2802,6 +3080,9 @@
 INTERCEPTOR(int, fstatfs64, int fd, void *buf) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, fstatfs64, fd, buf);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(fstatfs64)(fd, buf);
   if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs64_sz);
   return res;
@@ -2818,6 +3099,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, statvfs, path, buf);
   if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(statvfs)(path, buf);
   if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
   return res;
@@ -2825,6 +3109,9 @@
 INTERCEPTOR(int, fstatvfs, int fd, void *buf) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, fstatvfs, fd, buf);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(fstatvfs)(fd, buf);
   if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
   return res;
@@ -2841,6 +3128,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, statvfs64, path, buf);
   if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(statvfs64)(path, buf);
   if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs64_sz);
   return res;
@@ -2848,6 +3138,9 @@
 INTERCEPTOR(int, fstatvfs64, int fd, void *buf) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, fstatvfs64, fd, buf);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(fstatvfs64)(fd, buf);
   if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs64_sz);
   return res;
@@ -2872,7 +3165,7 @@
 #define INIT_INITGROUPS
 #endif
 
-#if SANITIZER_INTERCEPT_ETHER
+#if SANITIZER_INTERCEPT_ETHER_NTOA_ATON
 INTERCEPTOR(char *, ether_ntoa, __sanitizer_ether_addr *addr) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, ether_ntoa, addr);
@@ -2889,10 +3182,21 @@
   if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, sizeof(*res));
   return res;
 }
+#define INIT_ETHER_NTOA_ATON             \
+  COMMON_INTERCEPT_FUNCTION(ether_ntoa); \
+  COMMON_INTERCEPT_FUNCTION(ether_aton);
+#else
+#define INIT_ETHER_NTOA_ATON
+#endif
+
+#if SANITIZER_INTERCEPT_ETHER_HOST
 INTERCEPTOR(int, ether_ntohost, char *hostname, __sanitizer_ether_addr *addr) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, ether_ntohost, hostname, addr);
   if (addr) COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, sizeof(*addr));
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(ether_ntohost)(hostname, addr);
   if (!res && hostname)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hostname, REAL(strlen)(hostname) + 1);
@@ -2903,6 +3207,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, ether_hostton, hostname, addr);
   if (hostname)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, hostname, REAL(strlen)(hostname) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(ether_hostton)(hostname, addr);
   if (!res && addr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, sizeof(*addr));
   return res;
@@ -2912,6 +3219,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, ether_line, line, addr, hostname);
   if (line) COMMON_INTERCEPTOR_READ_RANGE(ctx, line, REAL(strlen)(line) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(ether_line)(line, addr, hostname);
   if (!res) {
     if (addr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, sizeof(*addr));
@@ -2920,14 +3230,12 @@
   }
   return res;
 }
-#define INIT_ETHER                          \
-  COMMON_INTERCEPT_FUNCTION(ether_ntoa);    \
-  COMMON_INTERCEPT_FUNCTION(ether_aton);    \
+#define INIT_ETHER_HOST                     \
   COMMON_INTERCEPT_FUNCTION(ether_ntohost); \
   COMMON_INTERCEPT_FUNCTION(ether_hostton); \
   COMMON_INTERCEPT_FUNCTION(ether_line);
 #else
-#define INIT_ETHER
+#define INIT_ETHER_HOST
 #endif
 
 #if SANITIZER_INTERCEPT_ETHER_R
@@ -2935,6 +3243,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, ether_ntoa_r, addr, buf);
   if (addr) COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, sizeof(*addr));
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(ether_ntoa_r)(addr, buf);
   if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, REAL(strlen)(res) + 1);
   return res;
@@ -2944,6 +3255,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, ether_aton_r, buf, addr);
   if (buf) COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, REAL(strlen)(buf) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   __sanitizer_ether_addr *res = REAL(ether_aton_r)(buf, addr);
   if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, sizeof(*res));
   return res;
@@ -2959,6 +3273,9 @@
 INTERCEPTOR(int, shmctl, int shmid, int cmd, void *buf) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, shmctl, shmid, cmd, buf);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(shmctl)(shmid, cmd, buf);
   if (res >= 0) {
     unsigned sz = 0;
@@ -2981,6 +3298,9 @@
 INTERCEPTOR(int, random_r, void *buf, u32 *result) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, random_r, buf, result);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(random_r)(buf, result);
   if (!res && result)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
@@ -2991,16 +3311,33 @@
 #define INIT_RANDOM_R
 #endif
 
-#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GET || \
-    SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSSCHED
-#define INTERCEPTOR_PTHREAD_ATTR_GET(what, sz)                      \
-  INTERCEPTOR(int, pthread_attr_get##what, void *attr, void *r) {   \
-    void *ctx;                                                      \
-    COMMON_INTERCEPTOR_ENTER(ctx, pthread_attr_get##what, attr, r); \
-    int res = REAL(pthread_attr_get##what)(attr, r);                \
-    if (!res && r) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, r, sz);      \
-    return res;                                                     \
+// FIXME: under ASan the REAL() call below may write to freed memory and corrupt
+// its metadata. See
+// https://code.google.com/p/address-sanitizer/issues/detail?id=321.
+#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GET ||              \
+    SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSSCHED || \
+    SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GET ||         \
+    SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GET ||        \
+    SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GET ||          \
+    SANITIZER_INTERCEPT_PTHREAD_BARRIERATTR_GET
+#define INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(fn, sz)            \
+  INTERCEPTOR(int, fn, void *attr, void *r) {                  \
+    void *ctx;                                                 \
+    COMMON_INTERCEPTOR_ENTER(ctx, fn, attr, r);                \
+    int res = REAL(fn)(attr, r);                               \
+    if (!res && r) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, r, sz); \
+    return res;                                                \
   }
+#define INTERCEPTOR_PTHREAD_ATTR_GET(what, sz) \
+  INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_attr_get##what, sz)
+#define INTERCEPTOR_PTHREAD_MUTEXATTR_GET(what, sz) \
+  INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_mutexattr_get##what, sz)
+#define INTERCEPTOR_PTHREAD_RWLOCKATTR_GET(what, sz) \
+  INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_rwlockattr_get##what, sz)
+#define INTERCEPTOR_PTHREAD_CONDATTR_GET(what, sz) \
+  INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_condattr_get##what, sz)
+#define INTERCEPTOR_PTHREAD_BARRIERATTR_GET(what, sz) \
+  INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_barrierattr_get##what, sz)
 #endif
 
 #if SANITIZER_INTERCEPT_PTHREAD_ATTR_GET
@@ -3013,6 +3350,9 @@
 INTERCEPTOR(int, pthread_attr_getstack, void *attr, void **addr, SIZE_T *size) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, pthread_attr_getstack, attr, addr, size);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(pthread_attr_getstack)(attr, addr, size);
   if (!res) {
     if (addr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, sizeof(*addr));
@@ -3059,6 +3399,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, pthread_attr_getaffinity_np, attr, cpusetsize,
                            cpuset);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(pthread_attr_getaffinity_np)(attr, cpusetsize, cpuset);
   if (!res && cpusetsize && cpuset)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cpuset, cpusetsize);
@@ -3071,6 +3414,94 @@
 #define INIT_PTHREAD_ATTR_GETAFFINITY_NP
 #endif
 
+#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPSHARED
+INTERCEPTOR_PTHREAD_MUTEXATTR_GET(pshared, sizeof(int))
+#define INIT_PTHREAD_MUTEXATTR_GETPSHARED \
+  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getpshared);
+#else
+#define INIT_PTHREAD_MUTEXATTR_GETPSHARED
+#endif
+
+#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETTYPE
+INTERCEPTOR_PTHREAD_MUTEXATTR_GET(type, sizeof(int))
+#define INIT_PTHREAD_MUTEXATTR_GETTYPE \
+  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_gettype);
+#else
+#define INIT_PTHREAD_MUTEXATTR_GETTYPE
+#endif
+
+#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPROTOCOL
+INTERCEPTOR_PTHREAD_MUTEXATTR_GET(protocol, sizeof(int))
+#define INIT_PTHREAD_MUTEXATTR_GETPROTOCOL \
+  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getprotocol);
+#else
+#define INIT_PTHREAD_MUTEXATTR_GETPROTOCOL
+#endif
+
+#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPRIOCEILING
+INTERCEPTOR_PTHREAD_MUTEXATTR_GET(prioceiling, sizeof(int))
+#define INIT_PTHREAD_MUTEXATTR_GETPRIOCEILING \
+  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getprioceiling);
+#else
+#define INIT_PTHREAD_MUTEXATTR_GETPRIOCEILING
+#endif
+
+#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST
+INTERCEPTOR_PTHREAD_MUTEXATTR_GET(robust, sizeof(int))
+#define INIT_PTHREAD_MUTEXATTR_GETROBUST \
+  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getrobust);
+#else
+#define INIT_PTHREAD_MUTEXATTR_GETROBUST
+#endif
+
+#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST_NP
+INTERCEPTOR_PTHREAD_MUTEXATTR_GET(robust_np, sizeof(int))
+#define INIT_PTHREAD_MUTEXATTR_GETROBUST_NP \
+  COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getrobust_np);
+#else
+#define INIT_PTHREAD_MUTEXATTR_GETROBUST_NP
+#endif
+
+#if SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETPSHARED
+INTERCEPTOR_PTHREAD_RWLOCKATTR_GET(pshared, sizeof(int))
+#define INIT_PTHREAD_RWLOCKATTR_GETPSHARED \
+  COMMON_INTERCEPT_FUNCTION(pthread_rwlockattr_getpshared);
+#else
+#define INIT_PTHREAD_RWLOCKATTR_GETPSHARED
+#endif
+
+#if SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETKIND_NP
+INTERCEPTOR_PTHREAD_RWLOCKATTR_GET(kind_np, sizeof(int))
+#define INIT_PTHREAD_RWLOCKATTR_GETKIND_NP \
+  COMMON_INTERCEPT_FUNCTION(pthread_rwlockattr_getkind_np);
+#else
+#define INIT_PTHREAD_RWLOCKATTR_GETKIND_NP
+#endif
+
+#if SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETPSHARED
+INTERCEPTOR_PTHREAD_CONDATTR_GET(pshared, sizeof(int))
+#define INIT_PTHREAD_CONDATTR_GETPSHARED \
+  COMMON_INTERCEPT_FUNCTION(pthread_condattr_getpshared);
+#else
+#define INIT_PTHREAD_CONDATTR_GETPSHARED
+#endif
+
+#if SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETCLOCK
+INTERCEPTOR_PTHREAD_CONDATTR_GET(clock, sizeof(int))
+#define INIT_PTHREAD_CONDATTR_GETCLOCK \
+  COMMON_INTERCEPT_FUNCTION(pthread_condattr_getclock);
+#else
+#define INIT_PTHREAD_CONDATTR_GETCLOCK
+#endif
+
+#if SANITIZER_INTERCEPT_PTHREAD_BARRIERATTR_GETPSHARED
+INTERCEPTOR_PTHREAD_BARRIERATTR_GET(pshared, sizeof(int)) // !mac !android
+#define INIT_PTHREAD_BARRIERATTR_GETPSHARED \
+  COMMON_INTERCEPT_FUNCTION(pthread_barrierattr_getpshared);
+#else
+#define INIT_PTHREAD_BARRIERATTR_GETPSHARED
+#endif
+
 #if SANITIZER_INTERCEPT_TMPNAM
 INTERCEPTOR(char *, tmpnam, char *s) {
   void *ctx;
@@ -3078,6 +3509,9 @@
   char *res = REAL(tmpnam)(s);
   if (res) {
     if (s)
+      // FIXME: under ASan the call below may write to freed memory and corrupt
+      // its metadata. See
+      // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
       COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, REAL(strlen)(s) + 1);
     else
       COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, REAL(strlen)(res) + 1);
@@ -3093,6 +3527,9 @@
 INTERCEPTOR(char *, tmpnam_r, char *s) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, tmpnam_r, s);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(tmpnam_r)(s);
   if (res && s) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, REAL(strlen)(s) + 1);
   return res;
@@ -3133,6 +3570,9 @@
 INTERCEPTOR(void, sincos, double x, double *sin, double *cos) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sincos, x, sin, cos);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   REAL(sincos)(x, sin, cos);
   if (sin) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sin, sizeof(*sin));
   if (cos) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cos, sizeof(*cos));
@@ -3140,6 +3580,9 @@
 INTERCEPTOR(void, sincosf, float x, float *sin, float *cos) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sincosf, x, sin, cos);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   REAL(sincosf)(x, sin, cos);
   if (sin) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sin, sizeof(*sin));
   if (cos) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cos, sizeof(*cos));
@@ -3147,6 +3590,9 @@
 INTERCEPTOR(void, sincosl, long double x, long double *sin, long double *cos) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, sincosl, x, sin, cos);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   REAL(sincosl)(x, sin, cos);
   if (sin) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sin, sizeof(*sin));
   if (cos) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cos, sizeof(*cos));
@@ -3163,6 +3609,9 @@
 INTERCEPTOR(double, remquo, double x, double y, int *quo) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, remquo, x, y, quo);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   double res = REAL(remquo)(x, y, quo);
   if (quo) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, quo, sizeof(*quo));
   return res;
@@ -3170,6 +3619,9 @@
 INTERCEPTOR(float, remquof, float x, float y, int *quo) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, remquof, x, y, quo);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   float res = REAL(remquof)(x, y, quo);
   if (quo) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, quo, sizeof(*quo));
   return res;
@@ -3177,6 +3629,9 @@
 INTERCEPTOR(long double, remquol, long double x, long double y, int *quo) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, remquol, x, y, quo);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   long double res = REAL(remquol)(x, y, quo);
   if (quo) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, quo, sizeof(*quo));
   return res;
@@ -3224,6 +3679,9 @@
 INTERCEPTOR(double, lgamma_r, double x, int *signp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, lgamma_r, x, signp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   double res = REAL(lgamma_r)(x, signp);
   if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
   return res;
@@ -3231,29 +3689,43 @@
 INTERCEPTOR(float, lgammaf_r, float x, int *signp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, lgammaf_r, x, signp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   float res = REAL(lgammaf_r)(x, signp);
   if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
   return res;
 }
-INTERCEPTOR(long double, lgammal_r, long double x, int *signp) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, lgammal_r, x, signp);
-  long double res = REAL(lgammal_r)(x, signp);
-  if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
-  return res;
-}
 #define INIT_LGAMMA_R                   \
   COMMON_INTERCEPT_FUNCTION(lgamma_r);  \
-  COMMON_INTERCEPT_FUNCTION(lgammaf_r); \
-  COMMON_INTERCEPT_FUNCTION(lgammal_r);
+  COMMON_INTERCEPT_FUNCTION(lgammaf_r);
 #else
 #define INIT_LGAMMA_R
 #endif
 
+#if SANITIZER_INTERCEPT_LGAMMAL_R
+INTERCEPTOR(long double, lgammal_r, long double x, int *signp) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, lgammal_r, x, signp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
+  long double res = REAL(lgammal_r)(x, signp);
+  if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
+  return res;
+}
+#define INIT_LGAMMAL_R COMMON_INTERCEPT_FUNCTION(lgammal_r);
+#else
+#define INIT_LGAMMAL_R
+#endif
+
 #if SANITIZER_INTERCEPT_DRAND48_R
 INTERCEPTOR(int, drand48_r, void *buffer, double *result) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, drand48_r, buffer, result);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(drand48_r)(buffer, result);
   if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
   return res;
@@ -3261,6 +3733,9 @@
 INTERCEPTOR(int, lrand48_r, void *buffer, long *result) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, lrand48_r, buffer, result);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(lrand48_r)(buffer, result);
   if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
   return res;
@@ -3288,6 +3763,9 @@
 INTERCEPTOR(SSIZE_T, getline, char **lineptr, SIZE_T *n, void *stream) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getline, lineptr, n, stream);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(getline)(lineptr, n, stream);
   if (res > 0) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lineptr, sizeof(*lineptr));
@@ -3296,20 +3774,28 @@
   }
   return res;
 }
+INTERCEPTOR(SSIZE_T, __getdelim, char **lineptr, SIZE_T *n, int delim,
+            void *stream) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, __getdelim, lineptr, n, delim, stream);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
+  SSIZE_T res = REAL(__getdelim)(lineptr, n, delim, stream);
+  if (res > 0) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lineptr, sizeof(*lineptr));
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *lineptr, res + 1);
+  }
+  return res;
+}
 INTERCEPTOR(SSIZE_T, getdelim, char **lineptr, SIZE_T *n, int delim,
             void *stream) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, getdelim, lineptr, n, delim, stream);
-  SSIZE_T res = REAL(getdelim)(lineptr, n, delim, stream);
-  if (res > 0) {
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lineptr, sizeof(*lineptr));
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *lineptr, res + 1);
-  }
-  return res;
+  return __getdelim(lineptr, n, delim, stream);
 }
-#define INIT_GETLINE                  \
-  COMMON_INTERCEPT_FUNCTION(getline); \
+#define INIT_GETLINE                     \
+  COMMON_INTERCEPT_FUNCTION(getline);    \
+  COMMON_INTERCEPT_FUNCTION(__getdelim); \
   COMMON_INTERCEPT_FUNCTION(getdelim);
 #else
 #define INIT_GETLINE
@@ -3328,6 +3814,9 @@
   if (outbytesleft)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, outbytesleft, sizeof(*outbytesleft));
   void *outbuf_orig = outbuf ? *outbuf : 0;
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SIZE_T res = REAL(iconv)(cd, inbuf, inbytesleft, outbuf, outbytesleft);
   if (res != (SIZE_T) - 1 && outbuf && *outbuf > outbuf_orig) {
     SIZE_T sz = (char *)*outbuf - (char *)outbuf_orig;
@@ -3344,6 +3833,9 @@
 INTERCEPTOR(__sanitizer_clock_t, times, void *tms) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, times, tms);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   __sanitizer_clock_t res = REAL(times)(tms);
   if (res != (__sanitizer_clock_t)-1 && tms)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tms, struct_tms_sz);
@@ -3360,7 +3852,11 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, __tls_get_addr, arg);
   void *res = REAL(__tls_get_addr)(arg);
-  DTLS_on_tls_get_addr(arg, res);
+  DTLS::DTV *dtv = DTLS_on_tls_get_addr(arg, res);
+  if (dtv) {
+    // New DTLS block has been allocated.
+    COMMON_INTERCEPTOR_INITIALIZE_RANGE((void *)dtv->beg, dtv->size);
+  }
   return res;
 }
 #else
@@ -3372,6 +3868,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, listxattr, path, list, size);
   if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(listxattr)(path, list, size);
   // Here and below, size == 0 is a special case where nothing is written to the
   // buffer, and res contains the desired buffer size.
@@ -3382,6 +3881,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, llistxattr, path, list, size);
   if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(llistxattr)(path, list, size);
   if (size && res > 0 && list) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, list, res);
   return res;
@@ -3389,6 +3891,9 @@
 INTERCEPTOR(SSIZE_T, flistxattr, int fd, char *list, SIZE_T size) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, flistxattr, fd, list, size);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(flistxattr)(fd, list, size);
   if (size && res > 0 && list) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, list, res);
   return res;
@@ -3408,6 +3913,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, getxattr, path, name, value, size);
   if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
   if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(getxattr)(path, name, value, size);
   if (size && res > 0 && value) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, res);
   return res;
@@ -3418,6 +3926,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, lgetxattr, path, name, value, size);
   if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, REAL(strlen)(path) + 1);
   if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(lgetxattr)(path, name, value, size);
   if (size && res > 0 && value) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, res);
   return res;
@@ -3427,6 +3938,9 @@
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, fgetxattr, fd, name, value, size);
   if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, REAL(strlen)(name) + 1);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   SSIZE_T res = REAL(fgetxattr)(fd, name, value, size);
   if (size && res > 0 && value) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, res);
   return res;
@@ -3443,6 +3957,9 @@
 INTERCEPTOR(int, getresuid, void *ruid, void *euid, void *suid) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getresuid, ruid, euid, suid);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getresuid)(ruid, euid, suid);
   if (res >= 0) {
     if (ruid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ruid, uid_t_sz);
@@ -3454,6 +3971,9 @@
 INTERCEPTOR(int, getresgid, void *rgid, void *egid, void *sgid) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getresgid, rgid, egid, sgid);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getresgid)(rgid, egid, sgid);
   if (res >= 0) {
     if (rgid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rgid, gid_t_sz);
@@ -3476,6 +3996,9 @@
 INTERCEPTOR(int, getifaddrs, __sanitizer_ifaddrs **ifap) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, getifaddrs, ifap);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(getifaddrs)(ifap);
   if (res == 0 && ifap) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ifap, sizeof(void *));
@@ -3509,6 +4032,9 @@
 INTERCEPTOR(char *, if_indextoname, unsigned int ifindex, char* ifname) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, if_indextoname, ifindex, ifname);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   char *res = REAL(if_indextoname)(ifindex, ifname);
   if (res && ifname)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ifname, REAL(strlen)(ifname) + 1);
@@ -3534,6 +4060,9 @@
   COMMON_INTERCEPTOR_ENTER(ctx, capget, hdrp, datap);
   if (hdrp)
     COMMON_INTERCEPTOR_READ_RANGE(ctx, hdrp, __user_cap_header_struct_sz);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(capget)(hdrp, datap);
   if (res == 0 && datap)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, datap, __user_cap_data_struct_sz);
@@ -3632,6 +4161,9 @@
 INTERCEPTOR(int, ftime, __sanitizer_timeb *tp) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, ftime, tp);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(ftime)(tp);
   if (tp)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tp, sizeof(*tp));
@@ -3647,6 +4179,9 @@
             unsigned size, int op) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, xdrmem_create, xdrs, addr, size, op);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   REAL(xdrmem_create)(xdrs, addr, size, op);
   COMMON_INTERCEPTOR_WRITE_RANGE(ctx, xdrs, sizeof(*xdrs));
   if (op == __sanitizer_XDR_ENCODE) {
@@ -3659,10 +4194,16 @@
 INTERCEPTOR(void, xdrstdio_create, __sanitizer_XDR *xdrs, void *file, int op) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, xdrstdio_create, xdrs, file, op);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   REAL(xdrstdio_create)(xdrs, file, op);
   COMMON_INTERCEPTOR_WRITE_RANGE(ctx, xdrs, sizeof(*xdrs));
 }
 
+// FIXME: under ASan the call below may write to freed memory and corrupt
+// its metadata. See
+// https://code.google.com/p/address-sanitizer/issues/detail?id=321.
 #define XDR_INTERCEPTOR(F, T)                             \
   INTERCEPTOR(int, F, __sanitizer_XDR *xdrs, T *p) {      \
     void *ctx;                                            \
@@ -3714,6 +4255,9 @@
     COMMON_INTERCEPTOR_READ_RANGE(ctx, sizep, sizeof(*sizep));
     COMMON_INTERCEPTOR_READ_RANGE(ctx, *p, *sizep);
   }
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(xdr_bytes)(xdrs, p, sizep, maxsize);
   if (p && sizep && xdrs->x_op == __sanitizer_XDR_DECODE) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
@@ -3731,6 +4275,9 @@
     COMMON_INTERCEPTOR_READ_RANGE(ctx, p, sizeof(*p));
     COMMON_INTERCEPTOR_READ_RANGE(ctx, *p, REAL(strlen)(*p) + 1);
   }
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   int res = REAL(xdr_string)(xdrs, p, maxsize);
   if (p && xdrs->x_op == __sanitizer_XDR_DECODE) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
@@ -3780,6 +4327,9 @@
             int (*compar)(const void *, const void *)) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, tsearch, key, rootp, compar);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   void *res = REAL(tsearch)(key, rootp, compar);
   if (res && *(void **)res == key)
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, sizeof(void *));
@@ -3930,6 +4480,9 @@
 INTERCEPTOR(__sanitizer_FILE *, open_memstream, char **ptr, SIZE_T *sizeloc) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, open_memstream, ptr, sizeloc);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   __sanitizer_FILE *res = REAL(open_memstream)(ptr, sizeloc);
   if (res) {
     COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, sizeof(*ptr));
@@ -3958,6 +4511,9 @@
             const char *mode) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, fmemopen, buf, size, mode);
+  // FIXME: under ASan the call below may write to freed memory and corrupt
+  // its metadata. See
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321.
   __sanitizer_FILE *res = REAL(fmemopen)(buf, size, mode);
   if (res) unpoison_file(res);
   return res;
@@ -4123,6 +4679,9 @@
   INIT_GETSOCKNAME;
   INIT_GETHOSTBYNAME;
   INIT_GETHOSTBYNAME_R;
+  INIT_GETHOSTBYNAME2_R;
+  INIT_GETHOSTBYADDR_R;
+  INIT_GETHOSTENT_R;
   INIT_GETSOCKOPT;
   INIT_ACCEPT;
   INIT_ACCEPT4;
@@ -4174,13 +4733,25 @@
   INIT_STATVFS;
   INIT_STATVFS64;
   INIT_INITGROUPS;
-  INIT_ETHER;
+  INIT_ETHER_NTOA_ATON;
+  INIT_ETHER_HOST;
   INIT_ETHER_R;
   INIT_SHMCTL;
   INIT_RANDOM_R;
   INIT_PTHREAD_ATTR_GET;
   INIT_PTHREAD_ATTR_GETINHERITSCHED;
   INIT_PTHREAD_ATTR_GETAFFINITY_NP;
+  INIT_PTHREAD_MUTEXATTR_GETPSHARED;
+  INIT_PTHREAD_MUTEXATTR_GETTYPE;
+  INIT_PTHREAD_MUTEXATTR_GETPROTOCOL;
+  INIT_PTHREAD_MUTEXATTR_GETPRIOCEILING;
+  INIT_PTHREAD_MUTEXATTR_GETROBUST;
+  INIT_PTHREAD_MUTEXATTR_GETROBUST_NP;
+  INIT_PTHREAD_RWLOCKATTR_GETPSHARED;
+  INIT_PTHREAD_RWLOCKATTR_GETKIND_NP;
+  INIT_PTHREAD_CONDATTR_GETPSHARED;
+  INIT_PTHREAD_CONDATTR_GETCLOCK;
+  INIT_PTHREAD_BARRIERATTR_GETPSHARED;
   INIT_TMPNAM;
   INIT_TMPNAM_R;
   INIT_TEMPNAM;
@@ -4189,6 +4760,7 @@
   INIT_REMQUO;
   INIT_LGAMMA;
   INIT_LGAMMA_R;
+  INIT_LGAMMAL_R;
   INIT_DRAND48_R;
   INIT_RAND_R;
   INIT_GETLINE;
diff --git a/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc b/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
index 31b65ca..69b7ca9 100755
--- a/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
+++ b/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc
@@ -529,7 +529,7 @@
   desc->name = "<DECODED_IOCTL>";
   desc->size = IOC_SIZE(req);
   // Sanity check.
-  if (desc->size > 1024) return false;
+  if (desc->size > 0xFFFF) return false;
   unsigned dir = IOC_DIR(req);
   switch (dir) {
     case IOC_NONE:
@@ -547,10 +547,10 @@
     default:
       return false;
   }
-  if (desc->type != IOC_NONE && desc->size == 0) return false;
-  char id = IOC_TYPE(req);
+  // Size can be 0 iff type is NONE.
+  if ((desc->type == IOC_NONE) != (desc->size == 0)) return false;
   // Sanity check.
-  if (!(id >= 'a' && id <= 'z') && !(id >= 'A' && id <= 'Z')) return false;
+  if (IOC_TYPE(req) == 0) return false;
   return true;
 }
 
diff --git a/lib/sanitizer_common/sanitizer_common_syscalls.inc b/lib/sanitizer_common/sanitizer_common_syscalls.inc
index 4bae308..23da703 100644
--- a/lib/sanitizer_common/sanitizer_common_syscalls.inc
+++ b/lib/sanitizer_common/sanitizer_common_syscalls.inc
@@ -831,6 +831,7 @@
   }
 }
 
+#if !SANITIZER_ANDROID
 PRE_SYSCALL(statfs)(const void *path, void *buf) {
   if (path)
     PRE_READ(path, __sanitizer::internal_strlen((const char *)path) + 1);
@@ -868,6 +869,7 @@
     if (buf) POST_WRITE(buf, struct_statfs64_sz);
   }
 }
+#endif // !SANITIZER_ANDROID
 
 PRE_SYSCALL(lstat)(const void *filename, void *statbuf) {
   if (filename)
@@ -2295,7 +2297,7 @@
 POST_SYSCALL(ni_syscall)(long res) {}
 
 PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
-#if defined(__i386) || defined (__x86_64)
+#if !SANITIZER_ANDROID && (defined(__i386) || defined (__x86_64))
   if (data) {
     if (request == ptrace_setregs) {
       PRE_READ((void *)data, struct_user_regs_struct_sz);
@@ -2314,7 +2316,7 @@
 }
 
 POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) {
-#if defined(__i386) || defined (__x86_64)
+#if !SANITIZER_ANDROID && (defined(__i386) || defined (__x86_64))
   if (res >= 0 && data) {
     // Note that this is different from the interceptor in
     // sanitizer_common_interceptors.inc.
diff --git a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
index 64861d0..c22de97 100644
--- a/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_coverage_libcdep.cc
@@ -59,6 +59,8 @@
 class CoverageData {
  public:
   void Init();
+  void BeforeFork();
+  void AfterFork(int child_pid);
   void Extend(uptr npcs);
   void Add(uptr pc);
 
@@ -85,45 +87,71 @@
   int pc_fd;
   StaticSpinMutex mu;
 
-  void DirectInit();
+  void DirectOpen();
+  void ReInit();
 };
 
 static CoverageData coverage_data;
 
-void CoverageData::DirectInit() {
-  InternalScopedString path(64);
-  internal_snprintf((char *)path.data(), path.size(), "%zd.sancov.raw",
-                    internal_getpid());
+void CoverageData::DirectOpen() {
+  InternalScopedString path(1024);
+  internal_snprintf((char *)path.data(), path.size(), "%s/%zd.sancov.raw",
+                    common_flags()->coverage_dir, internal_getpid());
   pc_fd = OpenFile(path.data(), true);
   if (internal_iserror(pc_fd)) {
     Report(" Coverage: failed to open %s for writing\n", path.data());
     Die();
   }
 
-  atomic_store(&pc_array_size, 0, memory_order_relaxed);
   pc_array_mapped_size = 0;
-
   CovUpdateMapping();
 }
 
 void CoverageData::Init() {
   pc_array = reinterpret_cast<uptr *>(
       MmapNoReserveOrDie(sizeof(uptr) * kPcArrayMaxSize, "CovInit"));
+  pc_fd = kInvalidFd;
   if (common_flags()->coverage_direct) {
-    DirectInit();
+    atomic_store(&pc_array_size, 0, memory_order_relaxed);
+    atomic_store(&pc_array_index, 0, memory_order_relaxed);
   } else {
-    pc_fd = 0;
     atomic_store(&pc_array_size, kPcArrayMaxSize, memory_order_relaxed);
+    atomic_store(&pc_array_index, 0, memory_order_relaxed);
   }
 }
 
+void CoverageData::ReInit() {
+  internal_munmap(pc_array, sizeof(uptr) * kPcArrayMaxSize);
+  if (pc_fd != kInvalidFd) internal_close(pc_fd);
+  if (common_flags()->coverage_direct) {
+    // In memory-mapped mode we must extend the new file to the known array
+    // size.
+    uptr size = atomic_load(&pc_array_size, memory_order_relaxed);
+    Init();
+    if (size) Extend(size);
+  } else {
+    Init();
+  }
+}
+
+void CoverageData::BeforeFork() {
+  mu.Lock();
+}
+
+void CoverageData::AfterFork(int child_pid) {
+  // We are single-threaded so it's OK to release the lock early.
+  mu.Unlock();
+  if (child_pid == 0) ReInit();
+}
+
 // Extend coverage PC array to fit additional npcs elements.
 void CoverageData::Extend(uptr npcs) {
-  // If pc_fd=0, pc array is a huge anonymous mapping that does not need to be
-  // resized.
-  if (!pc_fd) return;
+  if (!common_flags()->coverage_direct) return;
   SpinMutexLock l(&mu);
 
+  if (pc_fd == kInvalidFd) DirectOpen();
+  CHECK_NE(pc_fd, kInvalidFd);
+
   uptr size = atomic_load(&pc_array_size, memory_order_relaxed);
   size += npcs * sizeof(uptr);
 
@@ -221,15 +249,17 @@
   InternalScopedBuffer<char> path(1024);
   if (!packed) {
     CHECK(name);
-    internal_snprintf((char *)path.data(), path.size(), "%s.%zd.sancov",
-                      name, internal_getpid());
+    Printf("%s\n", common_flags()->coverage_dir);
+    internal_snprintf((char *)path.data(), path.size(), "%s/%s.%zd.sancov",
+                      common_flags()->coverage_dir, name, internal_getpid());
   } else {
     if (!name)
-      internal_snprintf((char *)path.data(), path.size(), "%zd.sancov.packed",
+      internal_snprintf((char *)path.data(), path.size(),
+                        "%s/%zd.sancov.packed", common_flags()->coverage_dir,
                         internal_getpid());
     else
-      internal_snprintf((char *)path.data(), path.size(), "%s.sancov.packed",
-                        name);
+      internal_snprintf((char *)path.data(), path.size(), "%s/%s.sancov.packed",
+                        common_flags()->coverage_dir, name);
   }
   uptr fd = OpenFile(path.data(), true);
   if (internal_iserror(fd)) {
@@ -279,8 +309,9 @@
         }
       } else {
         // One file per module per process.
-        internal_snprintf((char *)path.data(), path.size(), "%s.%zd.sancov",
-                          module_name, internal_getpid());
+        internal_snprintf((char *)path.data(), path.size(), "%s/%s.%zd.sancov",
+                          common_flags()->coverage_dir, module_name,
+                          internal_getpid());
         int fd = CovOpenFile(false /* packed */, module_name);
         if (fd > 0) {
           internal_write(fd, offsets.data(), offsets.size() * sizeof(u32));
@@ -314,6 +345,15 @@
   if (!common_flags()->coverage) return -1;
   return CovOpenFile(true /* packed */, name);
 }
+
+void CovBeforeFork() {
+  coverage_data.BeforeFork();
+}
+
+void CovAfterFork(int child_pid) {
+  coverage_data.AfterFork(child_pid);
+}
+
 }  // namespace __sanitizer
 
 extern "C" {
@@ -325,6 +365,12 @@
   coverage_data.Init();
 }
 SANITIZER_INTERFACE_ATTRIBUTE void __sanitizer_cov_module_init(uptr npcs) {
+  if (!common_flags()->coverage || !common_flags()->coverage_direct) return;
+  if (SANITIZER_ANDROID) {
+    // dlopen/dlclose interceptors do not work on Android, so we rely on
+    // Extend() calls to update .sancov.map.
+    CovUpdateMapping(GET_CALLER_PC());
+  }
   coverage_data.Extend(npcs);
 }
 SANITIZER_INTERFACE_ATTRIBUTE
diff --git a/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc b/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc
index 75f6162..e4ee875 100644
--- a/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_coverage_mapping_libcdep.cc
@@ -36,20 +36,41 @@
 namespace __sanitizer {
 
 static const uptr kMaxNumberOfModules = 1 << 14;
+static const uptr kMaxTextSize = 64 * 1024;
 
-void CovUpdateMapping() {
-  if (!common_flags()->coverage || !common_flags()->coverage_direct) return;
-
-  int err;
-  InternalScopedString tmp_path(64);
-  internal_snprintf((char *)tmp_path.data(), tmp_path.size(),
-                    "%zd.sancov.map.tmp", internal_getpid());
-  uptr map_fd = OpenFile(tmp_path.data(), true);
-  if (internal_iserror(map_fd)) {
-    Report(" Coverage: failed to open %s for writing\n", tmp_path.data());
-    Die();
+struct CachedMapping {
+ public:
+  bool NeedsUpdate(uptr pc) {
+    int new_pid = internal_getpid();
+    if (last_pid == new_pid && pc && pc >= last_range_start &&
+        pc < last_range_end)
+      return false;
+    last_pid = new_pid;
+    return true;
   }
 
+  void SetModuleRange(uptr start, uptr end) {
+    last_range_start = start;
+    last_range_end = end;
+  }
+
+ private:
+  uptr last_range_start, last_range_end;
+  int last_pid;
+};
+
+static CachedMapping cached_mapping;
+static StaticSpinMutex mapping_mu;
+
+void CovUpdateMapping(uptr caller_pc) {
+  if (!common_flags()->coverage || !common_flags()->coverage_direct) return;
+
+  SpinMutexLock l(&mapping_mu);
+
+  if (!cached_mapping.NeedsUpdate(caller_pc))
+    return;
+
+  InternalScopedString text(kMaxTextSize);
   InternalScopedBuffer<char> modules_data(kMaxNumberOfModules *
                                           sizeof(LoadedModule));
   LoadedModule *modules = (LoadedModule *)modules_data.data();
@@ -57,36 +78,46 @@
   int n_modules = GetListOfModules(modules, kMaxNumberOfModules,
                                    /* filter */ 0);
 
-  InternalScopedString line(4096);
-  line.append("%d\n", sizeof(uptr) * 8);
-  uptr res = internal_write(map_fd, line.data(), line.length());
-  if (internal_iserror(res, &err)) {
-    Printf("sancov.map write failed: %d\n", err);
-    Die();
-  }
-  line.clear();
-
+  text.append("%d\n", sizeof(uptr) * 8);
   for (int i = 0; i < n_modules; ++i) {
     char *module_name = StripModuleName(modules[i].full_name());
     for (unsigned j = 0; j < modules[i].n_ranges(); ++j) {
-      line.append("%zx %zx %zx %s\n", modules[i].address_range_start(j),
-                  modules[i].address_range_end(j), modules[i].base_address(),
-                  module_name);
-      res = internal_write(map_fd, line.data(), line.length());
-      if (internal_iserror(res, &err)) {
-        Printf("sancov.map write failed: %d\n", err);
-        Die();
+      if (modules[i].address_range_executable(j)) {
+        uptr start = modules[i].address_range_start(j);
+        uptr end = modules[i].address_range_end(j);
+        uptr base = modules[i].base_address();
+        text.append("%zx %zx %zx %s\n", start, end, base, module_name);
+        if (caller_pc && caller_pc >= start && caller_pc < end)
+          cached_mapping.SetModuleRange(start, end);
       }
-      line.clear();
     }
     InternalFree(module_name);
   }
 
+  int err;
+  InternalScopedString tmp_path(64 +
+                                internal_strlen(common_flags()->coverage_dir));
+  uptr res = internal_snprintf((char *)tmp_path.data(), tmp_path.size(),
+                    "%s/%zd.sancov.map.tmp", common_flags()->coverage_dir,
+                    internal_getpid());
+  CHECK_LE(res, tmp_path.size());
+  uptr map_fd = OpenFile(tmp_path.data(), true);
+  if (internal_iserror(map_fd)) {
+    Report(" Coverage: failed to open %s for writing\n", tmp_path.data());
+    Die();
+  }
+
+  res = internal_write(map_fd, text.data(), text.length());
+  if (internal_iserror(res, &err)) {
+    Printf("sancov.map write failed: %d\n", err);
+    Die();
+  }
   internal_close(map_fd);
 
-  InternalScopedString path(64);
-  internal_snprintf((char *)path.data(), path.size(), "%zd.sancov.map",
-                    internal_getpid());
+  InternalScopedString path(64 + internal_strlen(common_flags()->coverage_dir));
+  res = internal_snprintf((char *)path.data(), path.size(), "%s/%zd.sancov.map",
+                    common_flags()->coverage_dir, internal_getpid());
+  CHECK_LE(res, path.size());
   res = internal_rename(tmp_path.data(), path.data());
   if (internal_iserror(res, &err)) {
     Printf("sancov.map rename failed: %d\n", err);
diff --git a/lib/sanitizer_common/sanitizer_deadlock_detector2.cc b/lib/sanitizer_common/sanitizer_deadlock_detector2.cc
index 2284362..87d4a4d 100644
--- a/lib/sanitizer_common/sanitizer_deadlock_detector2.cc
+++ b/lib/sanitizer_common/sanitizer_deadlock_detector2.cc
@@ -187,8 +187,7 @@
     id = id_gen++;
   }
   CHECK_LE(id, kMaxMutex);
-  VPrintf(3, "#%llu: DD::allocateId assign id %d\n",
-      cb->lt->ctx, id);
+  VPrintf(3, "#%llu: DD::allocateId assign id %d\n", cb->lt->ctx, id);
   return id;
 }
 
diff --git a/lib/sanitizer_common/sanitizer_flags.cc b/lib/sanitizer_common/sanitizer_flags.cc
index 406bb64..0a70e16 100644
--- a/lib/sanitizer_common/sanitizer_flags.cc
+++ b/lib/sanitizer_common/sanitizer_flags.cc
@@ -29,6 +29,11 @@
 
 IntrusiveList<FlagDescription> flag_descriptions;
 
+// If set, the tool will install its own SEGV signal handler by default.
+#ifndef SANITIZER_NEEDS_SEGV
+# define SANITIZER_NEEDS_SEGV 1
+#endif
+
 void SetCommonFlagsDefaults(CommonFlags *f) {
   f->symbolize = true;
   f->external_symbolizer_path = 0;
@@ -55,7 +60,8 @@
   f->legacy_pthread_cond = false;
   f->intercept_tls_get_addr = false;
   f->coverage = false;
-  f->coverage_direct = false;
+  f->coverage_direct = SANITIZER_ANDROID;
+  f->coverage_dir = ".";
   f->full_address_space = false;
 }
 
@@ -132,6 +138,9 @@
             "If set, coverage information will be dumped directly to a memory "
             "mapped file. This way data is not lost even if the process is "
             "suddenly killed.");
+  ParseFlag(str, &f->coverage_dir, "coverage_dir",
+            "Target directory for coverage dumps. Defaults to the current "
+            "directory.");
   ParseFlag(str, &f->full_address_space, "full_address_space",
             "Sanitize complete address space; "
             "by default kernel area on 32-bit platforms will not be sanitized");
@@ -150,14 +159,17 @@
     pos = internal_strstr(env, name);
     if (pos == 0)
       return false;
-    if (pos != env && ((pos[-1] >= 'a' && pos[-1] <= 'z') || pos[-1] == '_')) {
+    const char *name_end = pos + internal_strlen(name);
+    if ((pos != env &&
+         ((pos[-1] >= 'a' && pos[-1] <= 'z') || pos[-1] == '_')) ||
+        *name_end != '=') {
       // Seems to be middle of another flag name or value.
       env = pos + 1;
       continue;
     }
+    pos = name_end;
     break;
   }
-  pos += internal_strlen(name);
   const char *end;
   if (pos[0] != '=') {
     end = pos;
diff --git a/lib/sanitizer_common/sanitizer_flags.h b/lib/sanitizer_common/sanitizer_flags.h
index 1ad53dc..41dc218 100644
--- a/lib/sanitizer_common/sanitizer_flags.h
+++ b/lib/sanitizer_common/sanitizer_flags.h
@@ -55,6 +55,7 @@
   uptr mmap_limit_mb;
   bool coverage;
   bool coverage_direct;
+  const char *coverage_dir;
   bool full_address_space;
 };
 
diff --git a/lib/sanitizer_common/sanitizer_freebsd.h b/lib/sanitizer_common/sanitizer_freebsd.h
new file mode 100644
index 0000000..52a2a85
--- /dev/null
+++ b/lib/sanitizer_common/sanitizer_freebsd.h
@@ -0,0 +1,82 @@
+//===-- sanitizer_freebsd.h -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of Sanitizer runtime. It contains FreeBSD-specific
+// definitions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_FREEBSD_H
+#define SANITIZER_FREEBSD_H
+
+#include "sanitizer_internal_defs.h"
+
+// x86-64 FreeBSD 9.2 and older define 'ucontext_t' incorrectly in
+// 32-bit mode.
+#if SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32)
+# include <osreldate.h>
+# if __FreeBSD_version <= 902001  // v9.2
+#  include <ucontext.h>
+
+namespace __sanitizer {
+
+typedef __int32_t __xregister_t;
+
+typedef struct __xmcontext {
+  __xregister_t mc_onstack;
+  __xregister_t mc_gs;
+  __xregister_t mc_fs;
+  __xregister_t mc_es;
+  __xregister_t mc_ds;
+  __xregister_t mc_edi;
+  __xregister_t mc_esi;
+  __xregister_t mc_ebp;
+  __xregister_t mc_isp;
+  __xregister_t mc_ebx;
+  __xregister_t mc_edx;
+  __xregister_t mc_ecx;
+  __xregister_t mc_eax;
+  __xregister_t mc_trapno;
+  __xregister_t mc_err;
+  __xregister_t mc_eip;
+  __xregister_t mc_cs;
+  __xregister_t mc_eflags;
+  __xregister_t mc_esp;
+  __xregister_t mc_ss;
+
+  int mc_len;
+  int mc_fpformat;
+  int mc_ownedfp;
+  __xregister_t mc_flags;
+
+  int mc_fpstate[128] __aligned(16);
+  __xregister_t mc_fsbase;
+  __xregister_t mc_gsbase;
+  __xregister_t mc_xfpustate;
+  __xregister_t mc_xfpustate_len;
+
+  int mc_spare2[4];
+} xmcontext_t;
+
+typedef struct __xucontext {
+  sigset_t  uc_sigmask;
+  xmcontext_t  uc_mcontext;
+
+  struct __ucontext *uc_link;
+  stack_t uc_stack;
+  int uc_flags;
+  int __spare__[4];
+} xucontext_t;
+
+}  // namespace __sanitizer
+
+# endif  // __FreeBSD_version <= 902001
+#endif  // SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32)
+
+#endif  // SANITIZER_FREEBSD_H
diff --git a/lib/sanitizer_common/sanitizer_internal_defs.h b/lib/sanitizer_common/sanitizer_internal_defs.h
index 9db5f8f..c8985b4 100644
--- a/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -34,11 +34,6 @@
 # define SANITIZER_SUPPORTS_WEAK_HOOKS 0
 #endif
 
-// If set, the tool will install its own SEGV signal handler.
-#ifndef SANITIZER_NEEDS_SEGV
-# define SANITIZER_NEEDS_SEGV 1
-#endif
-
 // GCC does not understand __has_feature
 #if !defined(__has_feature)
 # define __has_feature(x) 0
diff --git a/lib/sanitizer_common/sanitizer_linux.cc b/lib/sanitizer_common/sanitizer_linux.cc
index f27f22e..e661fcf 100644
--- a/lib/sanitizer_common/sanitizer_linux.cc
+++ b/lib/sanitizer_common/sanitizer_linux.cc
@@ -46,15 +46,16 @@
 #include <sys/time.h>
 #include <sys/types.h>
 #include <unistd.h>
-#include <unwind.h>
 
 #if SANITIZER_FREEBSD
+#include <sys/sysctl.h>
 #include <machine/atomic.h>
 extern "C" {
 // <sys/umtx.h> must be included after <errno.h> and <sys/types.h> on
 // FreeBSD 9.2 and 10.0.
 #include <sys/umtx.h>
 }
+extern char **environ;  // provided by crt1
 #endif  // SANITIZER_FREEBSD
 
 #if !SANITIZER_ANDROID
@@ -314,9 +315,20 @@
   return (u64)tv.tv_sec * 1000*1000*1000 + tv.tv_usec * 1000;
 }
 
-// Like getenv, but reads env directly from /proc and does not use libc.
-// This function should be called first inside __asan_init.
+// Like getenv, but reads env directly from /proc (on Linux) or parses the
+// 'environ' array (on FreeBSD) and does not use libc. This function should be
+// called first inside __asan_init.
 const char *GetEnv(const char *name) {
+#if SANITIZER_FREEBSD
+  if (::environ != 0) {
+    uptr NameLen = internal_strlen(name);
+    for (char **Env = ::environ; *Env != 0; Env++) {
+      if (internal_strncmp(*Env, name, NameLen) == 0 && (*Env)[NameLen] == '=')
+        return (*Env) + NameLen + 1;
+    }
+  }
+  return 0;  // Not found.
+#elif SANITIZER_LINUX
   static char *environ;
   static uptr len;
   static bool inited;
@@ -340,6 +352,9 @@
     p = endp + 1;
   }
   return 0;  // Not found.
+#else
+#error "Unsupported platform"
+#endif
 }
 
 extern "C" {
@@ -667,24 +682,32 @@
 static uptr proc_self_exe_cache_len = 0;
 
 uptr ReadBinaryName(/*out*/char *buf, uptr buf_len) {
+  if (proc_self_exe_cache_len > 0) {
+    // If available, use the cached module name.
+    uptr module_name_len =
+        internal_snprintf(buf, buf_len, "%s", proc_self_exe_cache_str);
+    CHECK_LT(module_name_len, buf_len);
+    return module_name_len;
+  }
+#if SANITIZER_FREEBSD
+  const int Mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
+  size_t Size = buf_len;
+  bool IsErr = (sysctl(Mib, 4, buf, &Size, NULL, 0) != 0);
+  int readlink_error = IsErr ? errno : 0;
+  uptr module_name_len = Size;
+#else
   uptr module_name_len = internal_readlink(
       "/proc/self/exe", buf, buf_len);
   int readlink_error;
-  if (internal_iserror(module_name_len, &readlink_error)) {
-    if (proc_self_exe_cache_len) {
-      // If available, use the cached module name.
-      CHECK_LE(proc_self_exe_cache_len, buf_len);
-      internal_strncpy(buf, proc_self_exe_cache_str, buf_len);
-      module_name_len = internal_strlen(proc_self_exe_cache_str);
-    } else {
-      // We can't read /proc/self/exe for some reason, assume the name of the
-      // binary is unknown.
-      Report("WARNING: readlink(\"/proc/self/exe\") failed with errno %d, "
-             "some stack frames may not be symbolized\n", readlink_error);
-      module_name_len = internal_snprintf(buf, buf_len, "/proc/self/exe");
-    }
+  bool IsErr = internal_iserror(module_name_len, &readlink_error);
+#endif
+  if (IsErr) {
+    // We can't read /proc/self/exe for some reason, assume the name of the
+    // binary is unknown.
+    Report("WARNING: readlink(\"/proc/self/exe\") failed with errno %d, "
+           "some stack frames may not be symbolized\n", readlink_error);
+    module_name_len = internal_snprintf(buf, buf_len, "/proc/self/exe");
     CHECK_LT(module_name_len, buf_len);
-    buf[module_name_len] = '\0';
   }
   return module_name_len;
 }
diff --git a/lib/sanitizer_common/sanitizer_linux_libcdep.cc b/lib/sanitizer_common/sanitizer_linux_libcdep.cc
index 396a0a8..92e6b78 100644
--- a/lib/sanitizer_common/sanitizer_linux_libcdep.cc
+++ b/lib/sanitizer_common/sanitizer_linux_libcdep.cc
@@ -302,11 +302,11 @@
 static atomic_uintptr_t kThreadDescriptorSize;
 
 uptr ThreadDescriptorSize() {
-  char buf[64];
   uptr val = atomic_load(&kThreadDescriptorSize, memory_order_relaxed);
   if (val)
     return val;
 #ifdef _CS_GNU_LIBC_VERSION
+  char buf[64];
   uptr len = confstr(_CS_GNU_LIBC_VERSION, buf, sizeof(buf));
   if (len < sizeof(buf) && internal_strncmp(buf, "glibc 2.", 8) == 0) {
     char *end;
@@ -507,7 +507,8 @@
     if (phdr->p_type == PT_LOAD) {
       uptr cur_beg = info->dlpi_addr + phdr->p_vaddr;
       uptr cur_end = cur_beg + phdr->p_memsz;
-      cur_module->addAddressRange(cur_beg, cur_end);
+      bool executable = phdr->p_flags & PF_X;
+      cur_module->addAddressRange(cur_beg, cur_end, executable);
     }
   }
   return 0;
diff --git a/lib/sanitizer_common/sanitizer_platform_interceptors.h b/lib/sanitizer_common/sanitizer_platform_interceptors.h
index a51a00c..e9d5c35 100644
--- a/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -75,11 +75,11 @@
 #define SANITIZER_INTERCEPT_STRPTIME SI_NOT_WINDOWS
 
 #define SANITIZER_INTERCEPT_SCANF SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_ISOC99_SCANF SI_LINUX
+#define SANITIZER_INTERCEPT_ISOC99_SCANF SI_LINUX_NOT_ANDROID
 
 #ifndef SANITIZER_INTERCEPT_PRINTF
 # define SANITIZER_INTERCEPT_PRINTF SI_NOT_WINDOWS
-# define SANITIZER_INTERCEPT_ISOC99_PRINTF SI_LINUX
+# define SANITIZER_INTERCEPT_ISOC99_PRINTF SI_LINUX_NOT_ANDROID
 #endif
 
 #define SANITIZER_INTERCEPT_FREXP 1
@@ -88,10 +88,10 @@
 #define SANITIZER_INTERCEPT_GETPWNAM_AND_FRIENDS SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_GETPWNAM_R_AND_FRIENDS \
   SI_MAC || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_GETPWENT SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_GETPWENT SI_MAC || SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_FGETPWENT SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_GETPWENT_R SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_SETPWENT SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_SETPWENT SI_MAC || SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_CLOCK_GETTIME SI_LINUX
 #define SANITIZER_INTERCEPT_GETITIMER SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_TIME SI_NOT_WINDOWS
@@ -104,9 +104,12 @@
 #define SANITIZER_INTERCEPT_GETSOCKNAME SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_GETHOSTBYNAME SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_GETHOSTBYNAME_R SI_LINUX
+#define SANITIZER_INTERCEPT_GETHOSTBYNAME2_R SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_GETHOSTBYADDR_R SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_GETHOSTENT_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_GETSOCKOPT SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_ACCEPT SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_ACCEPT4 SI_LINUX
+#define SANITIZER_INTERCEPT_ACCEPT4 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_MODF SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_RECVMSG SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_GETPEERNAME SI_NOT_WINDOWS
@@ -119,13 +122,13 @@
    (defined(__i386) || defined (__x86_64))  // NOLINT
 #define SANITIZER_INTERCEPT_SETLOCALE SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_GETCWD SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_GET_CURRENT_DIR_NAME SI_LINUX
+#define SANITIZER_INTERCEPT_GET_CURRENT_DIR_NAME SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_STRTOIMAX SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_MBSTOWCS SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_MBSNRTOWCS SI_MAC || SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_WCSTOMBS SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_WCSNRTOMBS SI_MAC || SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_TCGETATTR SI_LINUX
+#define SANITIZER_INTERCEPT_TCGETATTR SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_REALPATH SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_CANONICALIZE_FILE_NAME SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_CONFSTR SI_MAC || SI_LINUX_NOT_ANDROID
@@ -142,19 +145,20 @@
 #define SANITIZER_INTERCEPT_SIGWAIT SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_SIGWAITINFO SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SIGTIMEDWAIT SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_SIGSETOPS SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_SIGSETOPS SI_MAC || SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SIGPENDING SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_SIGPROCMASK SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_BACKTRACE SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_GETMNTENT SI_LINUX
 #define SANITIZER_INTERCEPT_GETMNTENT_R SI_LINUX_NOT_ANDROID
-#define SANITIZER_INTERCEPT_STATFS SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_STATFS SI_MAC || SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_STATFS64 \
   (SI_MAC && !SI_IOS) || SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_STATVFS SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_STATVFS64 SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_INITGROUPS SI_NOT_WINDOWS
-#define SANITIZER_INTERCEPT_ETHER SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_ETHER_NTOA_ATON SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_ETHER_HOST SI_MAC || SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_ETHER_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SHMCTL \
   (SI_LINUX_NOT_ANDROID && SANITIZER_WORDSIZE == 64)
@@ -163,6 +167,19 @@
 #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSCHED \
   SI_MAC || SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_PTHREAD_ATTR_GETAFFINITY_NP SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPSHARED SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETTYPE SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPROTOCOL \
+  SI_MAC || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPRIOCEILING \
+  SI_MAC || SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST_NP SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETPSHARED SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETKIND_NP SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETPSHARED SI_NOT_WINDOWS
+#define SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETCLOCK SI_LINUX_NOT_ANDROID
+#define SANITIZER_INTERCEPT_PTHREAD_BARRIERATTR_GETPSHARED SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_TMPNAM SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_TMPNAM_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_TEMPNAM SI_NOT_WINDOWS
@@ -170,6 +187,7 @@
 #define SANITIZER_INTERCEPT_REMQUO SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_LGAMMA SI_NOT_WINDOWS
 #define SANITIZER_INTERCEPT_LGAMMA_R SI_LINUX
+#define SANITIZER_INTERCEPT_LGAMMAL_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_DRAND48_R SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_RAND_R SI_MAC || SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_ICONV SI_LINUX_NOT_ANDROID
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_linux.cc b/lib/sanitizer_common/sanitizer_platform_limits_linux.cc
index 66c6ab9..92353e4 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_linux.cc
+++ b/lib/sanitizer_common/sanitizer_platform_limits_linux.cc
@@ -29,7 +29,7 @@
 // are not defined anywhere in userspace headers. Fake them. This seems to work
 // fine with newer headers, too.
 #include <asm/posix_types.h>
-#if defined(__x86_64__)
+#if defined(__x86_64__) ||  defined(__mips__)
 #include <sys/stat.h>
 #else
 #define ino_t __kernel_ino_t
@@ -50,21 +50,19 @@
 
 #include <linux/aio_abi.h>
 
-#if SANITIZER_ANDROID
-#include <asm/statfs.h>
-#else
-#include <sys/statfs.h>
-#endif
-
 #if !SANITIZER_ANDROID
+#include <sys/statfs.h>
 #include <linux/perf_event.h>
 #endif
 
 namespace __sanitizer {
+#if !SANITIZER_ANDROID
   unsigned struct_statfs64_sz = sizeof(struct statfs64);
+#endif
 }  // namespace __sanitizer
 
-#if !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__aarch64__)
+#if !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__aarch64__)\
+                            && !defined(__mips__)
 COMPILER_CHECK(struct___old_kernel_stat_sz == sizeof(struct __old_kernel_stat));
 #endif
 
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
index 4f7c19d..29fea6e 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.cc
@@ -191,13 +191,14 @@
   unsigned struct_tms_sz = sizeof(struct tms);
   unsigned struct_sigevent_sz = sizeof(struct sigevent);
   unsigned struct_sched_param_sz = sizeof(struct sched_param);
-  unsigned struct_statfs_sz = sizeof(struct statfs);
+
 
 #if SANITIZER_MAC && !SANITIZER_IOS
   unsigned struct_statfs64_sz = sizeof(struct statfs64);
 #endif // SANITIZER_MAC && !SANITIZER_IOS
 
 #if !SANITIZER_ANDROID
+  unsigned struct_statfs_sz = sizeof(struct statfs);
   unsigned struct_sockaddr_sz = sizeof(struct sockaddr);
   unsigned ucontext_t_sz = sizeof(ucontext_t);
 #endif // !SANITIZER_ANDROID
@@ -289,6 +290,7 @@
   int ptrace_setfpregs = PTRACE_SETFPREGS;
   int ptrace_getfpxregs = PTRACE_GETFPXREGS;
   int ptrace_setfpxregs = PTRACE_SETFPXREGS;
+  int ptrace_geteventmsg = PTRACE_GETEVENTMSG;
 #if (defined(PTRACE_GETSIGINFO) && defined(PTRACE_SETSIGINFO)) ||              \
     (defined(PT_GETSIGINFO) && defined(PT_SETSIGINFO))
   int ptrace_getsiginfo = PTRACE_GETSIGINFO;
@@ -1058,6 +1060,10 @@
 
 CHECK_TYPE_SIZE(clock_t);
 
+#if SANITIZER_LINUX
+CHECK_TYPE_SIZE(clockid_t);
+#endif
+
 #if !SANITIZER_ANDROID
 CHECK_TYPE_SIZE(ifaddrs);
 CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_next);
diff --git a/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
index 492daf2..8ec1c58 100644
--- a/lib/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -39,11 +39,11 @@
   extern unsigned struct_itimerspec_sz;
   extern unsigned struct_sigevent_sz;
   extern unsigned struct_sched_param_sz;
-  extern unsigned struct_statfs_sz;
   extern unsigned struct_statfs64_sz;
-  extern unsigned struct_sockaddr_sz;
 
 #if !SANITIZER_ANDROID
+  extern unsigned struct_statfs_sz;
+  extern unsigned struct_sockaddr_sz;
   extern unsigned ucontext_t_sz;
 #endif // !SANITIZER_ANDROID
 
@@ -67,6 +67,13 @@
 #elif defined(__powerpc64__)
   const unsigned struct_kernel_stat_sz = 144;
   const unsigned struct_kernel_stat64_sz = 104;
+#elif defined(__mips__)
+  #if SANITIZER_WORDSIZE == 64
+  const unsigned struct_kernel_stat_sz = 216;
+  #else
+  const unsigned struct_kernel_stat_sz = 144;
+  #endif
+  const unsigned struct_kernel_stat64_sz = 104;
 #endif
   struct __sanitizer_perf_event_attr {
     unsigned type;
@@ -162,6 +169,12 @@
     unsigned __seq;
     u64 __unused1;
     u64 __unused2;
+#elif defined(__mips__)
+    unsigned int mode;
+    unsigned short __seq;
+    unsigned short __pad1;
+    unsigned long __unused1;
+    unsigned long __unused2;
 #else
     unsigned short mode;
     unsigned short __pad1;
@@ -190,15 +203,15 @@
     u64 shm_ctime;
   #else
     uptr shm_atime;
-  #ifndef _LP64
+  #if !defined(_LP64) && !defined(__mips__)
     uptr __unused1;
   #endif
     uptr shm_dtime;
-  #ifndef _LP64
+  #if !defined(_LP64) && !defined(__mips__)
     uptr __unused2;
   #endif
     uptr shm_ctime;
-  #ifndef _LP64
+  #if !defined(_LP64) && !defined(__mips__)
     uptr __unused3;
   #endif
   #endif
@@ -440,8 +453,13 @@
   typedef long __sanitizer_clock_t;
 #endif
 
+#if SANITIZER_LINUX
+  typedef int __sanitizer_clockid_t;
+#endif
+
 #if SANITIZER_LINUX || SANITIZER_FREEBSD
-#if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__)
+#if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__)\
+                   || defined(__mips__)
   typedef unsigned __sanitizer___kernel_uid_t;
   typedef unsigned __sanitizer___kernel_gid_t;
 #else
@@ -454,7 +472,7 @@
   typedef long __sanitizer___kernel_off_t;
 #endif
 
-#if defined(__powerpc__) || defined(__aarch64__)
+#if defined(__powerpc__) || defined(__aarch64__) || defined(__mips__)
   typedef unsigned int __sanitizer___kernel_old_uid_t;
   typedef unsigned int __sanitizer___kernel_old_gid_t;
 #else
@@ -494,6 +512,9 @@
 
   // Linux system headers define the 'sa_handler' and 'sa_sigaction' macros.
   struct __sanitizer_sigaction {
+#if defined(__mips__) && !SANITIZER_FREEBSD
+    unsigned int sa_flags;
+#endif
     union {
       void (*sigaction)(int sig, void *siginfo, void *uctx);
       void (*handler)(int sig);
@@ -503,11 +524,16 @@
     __sanitizer_sigset_t sa_mask;
 #else
     __sanitizer_sigset_t sa_mask;
+#ifndef __mips__
     int sa_flags;
 #endif
+#endif
 #if SANITIZER_LINUX
     void (*sa_restorer)();
 #endif
+#if defined(__mips__) && (SANITIZER_WORDSIZE == 32)
+    int sa_resv[1];
+#endif
   };
 
 #if SANITIZER_FREEBSD
@@ -678,6 +704,7 @@
   extern int ptrace_setsiginfo;
   extern int ptrace_getregset;
   extern int ptrace_setregset;
+  extern int ptrace_geteventmsg;
 #endif
 
 #if (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID
@@ -720,7 +747,7 @@
 
 #define IOC_NRBITS 8
 #define IOC_TYPEBITS 8
-#if defined(__powerpc__) || defined(__powerpc64__)
+#if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__)
 #define IOC_SIZEBITS 13
 #define IOC_DIRBITS 3
 #define IOC_NONE 1U
diff --git a/lib/sanitizer_common/sanitizer_procmaps_linux.cc b/lib/sanitizer_common/sanitizer_procmaps_linux.cc
index 4e8bf10..c647765 100644
--- a/lib/sanitizer_common/sanitizer_procmaps_linux.cc
+++ b/lib/sanitizer_common/sanitizer_procmaps_linux.cc
@@ -244,12 +244,12 @@
                                             uptr max_modules,
                                             string_predicate_t filter) {
   Reset();
-  uptr cur_beg, cur_end, cur_offset;
+  uptr cur_beg, cur_end, cur_offset, prot;
   InternalScopedBuffer<char> module_name(kMaxPathLength);
   uptr n_modules = 0;
   for (uptr i = 0; n_modules < max_modules &&
                        Next(&cur_beg, &cur_end, &cur_offset, module_name.data(),
-                            module_name.size(), 0);
+                            module_name.size(), &prot);
        i++) {
     const char *cur_name = module_name.data();
     if (cur_name[0] == '\0')
@@ -270,7 +270,7 @@
     //   first entry.
     uptr base_address = (i ? cur_beg : 0) - cur_offset;
     LoadedModule *cur_module = new(mem) LoadedModule(cur_name, base_address);
-    cur_module->addAddressRange(cur_beg, cur_end);
+    cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
     n_modules++;
   }
   return n_modules;
diff --git a/lib/sanitizer_common/sanitizer_procmaps_mac.cc b/lib/sanitizer_common/sanitizer_procmaps_mac.cc
index 1eb02ab..074b91a 100644
--- a/lib/sanitizer_common/sanitizer_procmaps_mac.cc
+++ b/lib/sanitizer_common/sanitizer_procmaps_mac.cc
@@ -75,14 +75,16 @@
 bool MemoryMappingLayout::NextSegmentLoad(
     uptr *start, uptr *end, uptr *offset,
     char filename[], uptr filename_size, uptr *protection) {
-  if (protection)
-    UNIMPLEMENTED();
   const char* lc = current_load_cmd_addr_;
   current_load_cmd_addr_ += ((const load_command *)lc)->cmdsize;
   if (((const load_command *)lc)->cmd == kLCSegment) {
     const sptr dlloff = _dyld_get_image_vmaddr_slide(current_image_);
     const SegmentCommand* sc = (const SegmentCommand *)lc;
     if (start) *start = sc->vmaddr + dlloff;
+    if (protection) {
+      // Return the initial protection.
+      *protection = sc->initprot;
+    }
     if (end) *end = sc->vmaddr + sc->vmsize + dlloff;
     if (offset) {
       if (current_filetype_ == /*MH_EXECUTE*/ 0x2) {
@@ -157,12 +159,12 @@
                                             uptr max_modules,
                                             string_predicate_t filter) {
   Reset();
-  uptr cur_beg, cur_end;
+  uptr cur_beg, cur_end, prot;
   InternalScopedBuffer<char> module_name(kMaxPathLength);
   uptr n_modules = 0;
   for (uptr i = 0; n_modules < max_modules &&
                        Next(&cur_beg, &cur_end, 0, module_name.data(),
-                            module_name.size(), 0);
+                            module_name.size(), &prot);
        i++) {
     const char *cur_name = module_name.data();
     if (cur_name[0] == '\0')
@@ -178,7 +180,7 @@
       cur_module = new(mem) LoadedModule(cur_name, cur_beg);
       n_modules++;
     }
-    cur_module->addAddressRange(cur_beg, cur_end);
+    cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
   }
   return n_modules;
 }
diff --git a/lib/sanitizer_common/sanitizer_report_decorator.h b/lib/sanitizer_common/sanitizer_report_decorator.h
index 6e5b0ed..86536aa 100644
--- a/lib/sanitizer_common/sanitizer_report_decorator.h
+++ b/lib/sanitizer_common/sanitizer_report_decorator.h
@@ -20,12 +20,16 @@
 #include "sanitizer_common.h"
 
 namespace __sanitizer {
-class AnsiColorDecorator {
+class SanitizerCommonDecorator {
   // FIXME: This is not portable. It assumes the special strings are printed to
   // stdout, which is not the case on Windows (see SetConsoleTextAttribute()).
  public:
-  explicit AnsiColorDecorator(bool use_ansi_colors) : ansi_(use_ansi_colors) { }
+  SanitizerCommonDecorator() : ansi_(ColorizeReports()) {}
   const char *Bold()    const { return ansi_ ? "\033[1m" : ""; }
+  const char *Default() const { return ansi_ ? "\033[1m\033[0m"  : ""; }
+  const char *Warning()    { return Red(); }
+  const char *EndWarning() { return Default(); }
+ protected:
   const char *Black()   const { return ansi_ ? "\033[1m\033[30m" : ""; }
   const char *Red()     const { return ansi_ ? "\033[1m\033[31m" : ""; }
   const char *Green()   const { return ansi_ ? "\033[1m\033[32m" : ""; }
@@ -34,19 +38,10 @@
   const char *Magenta() const { return ansi_ ? "\033[1m\033[35m" : ""; }
   const char *Cyan()    const { return ansi_ ? "\033[1m\033[36m" : ""; }
   const char *White()   const { return ansi_ ? "\033[1m\033[37m" : ""; }
-  const char *Default() const { return ansi_ ? "\033[1m\033[0m"  : ""; }
  private:
   bool ansi_;
 };
 
-class SanitizerCommonDecorator: protected AnsiColorDecorator {
- public:
-  SanitizerCommonDecorator()
-      : __sanitizer::AnsiColorDecorator(ColorizeReports()) { }
-  const char *Warning()    { return Red(); }
-  const char *EndWarning() { return Default(); }
-};
-
 }  // namespace __sanitizer
 
 #endif  // SANITIZER_REPORT_DECORATOR_H
diff --git a/lib/sanitizer_common/sanitizer_stackdepot.cc b/lib/sanitizer_common/sanitizer_stackdepot.cc
index da5bd4a..5b70dfc 100644
--- a/lib/sanitizer_common/sanitizer_stackdepot.cc
+++ b/lib/sanitizer_common/sanitizer_stackdepot.cc
@@ -50,7 +50,10 @@
   uptr size;
   uptr stack[1];  // [size]
 
-  static const u32 kUseCountBits = 20;
+  static const u32 kTabSizeLog = 20;
+  // Lower kTabSizeLog bits are equal for all items in one bucket.
+  // We use these bits to store the per-stack use counter.
+  static const u32 kUseCountBits = kTabSizeLog;
   static const u32 kMaxUseCount = 1 << kUseCountBits;
   static const u32 kUseCountMask = (1 << kUseCountBits) - 1;
   static const u32 kHashMask = ~kUseCountMask;
@@ -100,7 +103,8 @@
 uptr *StackDepotHandle::stack() { return &node_->stack[0]; }
 
 // FIXME(dvyukov): this single reserved bit is used in TSan.
-typedef StackDepotBase<StackDepotNode, 1> StackDepot;
+typedef StackDepotBase<StackDepotNode, 1, StackDepotNode::kTabSizeLog>
+    StackDepot;
 static StackDepot theDepot;
 
 StackDepotStats *StackDepotGetStats() {
diff --git a/lib/sanitizer_common/sanitizer_stackdepotbase.h b/lib/sanitizer_common/sanitizer_stackdepotbase.h
index 07a973c..b4fa875 100644
--- a/lib/sanitizer_common/sanitizer_stackdepotbase.h
+++ b/lib/sanitizer_common/sanitizer_stackdepotbase.h
@@ -20,7 +20,7 @@
 
 namespace __sanitizer {
 
-template <class Node, int kReservedBits>
+template <class Node, int kReservedBits, int kTabSizeLog>
 class StackDepotBase {
  public:
   typedef typename Node::args_type args_type;
@@ -37,7 +37,7 @@
   static Node *lock(atomic_uintptr_t *p);
   static void unlock(atomic_uintptr_t *p, Node *s);
 
-  static const int kTabSize = 1024 * 1024;  // Hash table size.
+  static const int kTabSize = 1 << kTabSizeLog;  // Hash table size.
   static const int kPartBits = 8;
   static const int kPartShift = sizeof(u32) * 8 - kPartBits - kReservedBits;
   static const int kPartCount =
@@ -53,9 +53,10 @@
   friend class StackDepotReverseMap;
 };
 
-template <class Node, int kReservedBits>
-Node *StackDepotBase<Node, kReservedBits>::find(Node *s, args_type args,
-                                                u32 hash) {
+template <class Node, int kReservedBits, int kTabSizeLog>
+Node *StackDepotBase<Node, kReservedBits, kTabSizeLog>::find(Node *s,
+                                                             args_type args,
+                                                             u32 hash) {
   // Searches linked list s for the stack, returns its id.
   for (; s; s = s->link) {
     if (s->eq(hash, args)) {
@@ -65,8 +66,9 @@
   return 0;
 }
 
-template <class Node, int kReservedBits>
-Node *StackDepotBase<Node, kReservedBits>::lock(atomic_uintptr_t *p) {
+template <class Node, int kReservedBits, int kTabSizeLog>
+Node *StackDepotBase<Node, kReservedBits, kTabSizeLog>::lock(
+    atomic_uintptr_t *p) {
   // Uses the pointer lsb as mutex.
   for (int i = 0;; i++) {
     uptr cmp = atomic_load(p, memory_order_relaxed);
@@ -80,15 +82,17 @@
   }
 }
 
-template <class Node, int kReservedBits>
-void StackDepotBase<Node, kReservedBits>::unlock(atomic_uintptr_t *p, Node *s) {
+template <class Node, int kReservedBits, int kTabSizeLog>
+void StackDepotBase<Node, kReservedBits, kTabSizeLog>::unlock(
+    atomic_uintptr_t *p, Node *s) {
   DCHECK_EQ((uptr)s & 1, 0);
   atomic_store(p, (uptr)s, memory_order_release);
 }
 
-template <class Node, int kReservedBits>
-typename StackDepotBase<Node, kReservedBits>::handle_type
-StackDepotBase<Node, kReservedBits>::Put(args_type args, bool *inserted) {
+template <class Node, int kReservedBits, int kTabSizeLog>
+typename StackDepotBase<Node, kReservedBits, kTabSizeLog>::handle_type
+StackDepotBase<Node, kReservedBits, kTabSizeLog>::Put(args_type args,
+                                                      bool *inserted) {
   if (inserted) *inserted = false;
   if (!args.is_valid()) return handle_type();
   uptr h = args.hash();
@@ -125,9 +129,9 @@
   return s->get_handle();
 }
 
-template <class Node, int kReservedBits>
-typename StackDepotBase<Node, kReservedBits>::args_type
-StackDepotBase<Node, kReservedBits>::Get(u32 id) {
+template <class Node, int kReservedBits, int kTabSizeLog>
+typename StackDepotBase<Node, kReservedBits, kTabSizeLog>::args_type
+StackDepotBase<Node, kReservedBits, kTabSizeLog>::Get(u32 id) {
   if (id == 0) {
     return args_type();
   }
@@ -149,5 +153,5 @@
   return args_type();
 }
 
-} // namespace __sanitizer
+}  // namespace __sanitizer
 #endif  // SANITIZER_STACKDEPOTBASE_H
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.cc b/lib/sanitizer_common/sanitizer_stacktrace.cc
index c608fbb..3539639 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.cc
+++ b/lib/sanitizer_common/sanitizer_stacktrace.cc
@@ -18,11 +18,14 @@
 namespace __sanitizer {
 
 uptr StackTrace::GetPreviousInstructionPc(uptr pc) {
-#ifdef __arm__
+#if defined(__arm__)
   // Cancel Thumb bit.
   pc = pc & (~1);
 #endif
-#if defined(__sparc__)
+#if defined(__powerpc__) || defined(__powerpc64__)
+  // PCs are always 4 byte aligned.
+  return pc - 4;
+#elif defined(__sparc__)
   return pc - 8;
 #else
   return pc - 1;
diff --git a/lib/sanitizer_common/sanitizer_stacktrace.h b/lib/sanitizer_common/sanitizer_stacktrace.h
index c3ba193..fcaa777 100644
--- a/lib/sanitizer_common/sanitizer_stacktrace.h
+++ b/lib/sanitizer_common/sanitizer_stacktrace.h
@@ -52,9 +52,12 @@
 
   static bool WillUseFastUnwind(bool request_fast_unwind) {
     // Check if fast unwind is available. Fast unwind is the only option on Mac.
+    // It is also the only option on FreeBSD as the slow unwinding that
+    // leverages _Unwind_Backtrace() yields the call stack of the signal's
+    // handler and not of the code that raised the signal (as it does on Linux).
     if (!SANITIZER_CAN_FAST_UNWIND)
       return false;
-    else if (SANITIZER_MAC)
+    else if (SANITIZER_MAC != 0 || SANITIZER_FREEBSD != 0)
       return true;
     return request_fast_unwind;
   }
diff --git a/lib/sanitizer_common/sanitizer_tls_get_addr.cc b/lib/sanitizer_common/sanitizer_tls_get_addr.cc
index 42d7d1a..6142ce5 100644
--- a/lib/sanitizer_common/sanitizer_tls_get_addr.cc
+++ b/lib/sanitizer_common/sanitizer_tls_get_addr.cc
@@ -78,14 +78,13 @@
   DTLS_Deallocate(dtls.dtv, s);
 }
 
-void DTLS_on_tls_get_addr(void *arg_void, void *res) {
-  if (!common_flags()->intercept_tls_get_addr) return;
+DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res) {
+  if (!common_flags()->intercept_tls_get_addr) return 0;
   TlsGetAddrParam *arg = reinterpret_cast<TlsGetAddrParam *>(arg_void);
   uptr dso_id = arg->dso_id;
-  if (dtls.dtv_size == kDestroyedThread) return;
+  if (dtls.dtv_size == kDestroyedThread) return 0;
   DTLS_Resize(dso_id + 1);
-  if (dtls.dtv[dso_id].beg)
-    return;
+  if (dtls.dtv[dso_id].beg) return 0;
   uptr tls_size = 0;
   uptr tls_beg = reinterpret_cast<uptr>(res) - arg->offset;
   VPrintf(2, "__tls_get_addr: %p {%p,%p} => %p; tls_beg: %p; sp: %p "
@@ -110,6 +109,7 @@
   }
   dtls.dtv[dso_id].beg = tls_beg;
   dtls.dtv[dso_id].size = tls_size;
+  return dtls.dtv + dso_id;
 }
 
 void DTLS_on_libc_memalign(void *ptr, uptr size) {
@@ -123,7 +123,7 @@
 
 #else
 void DTLS_on_libc_memalign(void *ptr, uptr size) {}
-void DTLS_on_tls_get_addr(void *arg, void *res) {}
+DTLS::DTV *DTLS_on_tls_get_addr(void *arg, void *res) { return 0; }
 DTLS *DTLS_Get() { return 0; }
 void DTLS_Destroy() {}
 #endif  // SANITIZER_INTERCEPT_TLS_GET_ADDR
diff --git a/lib/sanitizer_common/sanitizer_tls_get_addr.h b/lib/sanitizer_common/sanitizer_tls_get_addr.h
index a64f11e..0fc9a22 100644
--- a/lib/sanitizer_common/sanitizer_tls_get_addr.h
+++ b/lib/sanitizer_common/sanitizer_tls_get_addr.h
@@ -48,7 +48,9 @@
   uptr last_memalign_ptr;
 };
 
-void DTLS_on_tls_get_addr(void *arg, void *res);
+// Returns pointer and size of a linker-allocated TLS block.
+// Each block is returned exactly once.
+DTLS::DTV *DTLS_on_tls_get_addr(void *arg, void *res);
 void DTLS_on_libc_memalign(void *ptr, uptr size);
 DTLS *DTLS_Get();
 void DTLS_Destroy();  // Make sure to call this before the thread is destroyed.
diff --git a/lib/sanitizer_common/sanitizer_win.cc b/lib/sanitizer_common/sanitizer_win.cc
index 697c59f..da24df6 100644
--- a/lib/sanitizer_common/sanitizer_win.cc
+++ b/lib/sanitizer_common/sanitizer_win.cc
@@ -137,6 +137,10 @@
   UNIMPLEMENTED();
 }
 
+void *MapWritableFileToMemory(void *addr, uptr size, uptr fd, uptr offset) {
+  UNIMPLEMENTED();
+}
+
 static const int kMaxEnvNameLength = 128;
 static const DWORD kMaxEnvValueLength = 32767;
 
@@ -352,6 +356,14 @@
   ExitProcess(exitcode);
 }
 
+uptr internal_ftruncate(fd_t fd, uptr size) {
+  UNIMPLEMENTED();
+}
+
+uptr internal_rename(const char *oldpath, const char *newpath) {
+  UNIMPLEMENTED();
+}
+
 // ---------------------- BlockingMutex ---------------- {{{1
 const uptr LOCK_UNINITIALIZED = 0;
 const uptr LOCK_READY = (uptr)-1;
diff --git a/lib/sanitizer_common/scripts/check_lint.sh b/lib/sanitizer_common/scripts/check_lint.sh
index 0b7aea1..33ab883 100755
--- a/lib/sanitizer_common/scripts/check_lint.sh
+++ b/lib/sanitizer_common/scripts/check_lint.sh
@@ -54,8 +54,7 @@
 }
 
 run_lint ${LLVM_LINT_FILTER} --filter=${LLVM_LINT_FILTER} \
-  lib/Transforms/Instrumentation/*Sanitizer.cpp \
-  lib/Transforms/Utils/SpecialCaseList.cpp &
+  lib/Transforms/Instrumentation/*Sanitizer.cpp &
 
 if [ "${COMPILER_RT}" == "" ]; then
   COMPILER_RT=projects/compiler-rt
diff --git a/lib/sanitizer_common/scripts/sancov.py b/lib/sanitizer_common/scripts/sancov.py
index dfb65b2..4769530 100755
--- a/lib/sanitizer_common/scripts/sancov.py
+++ b/lib/sanitizer_common/scripts/sancov.py
@@ -15,7 +15,8 @@
   print >> sys.stderr, "Usage: \n" + \
       " " + prog_name + " merge file1 [file2 ...]  > output\n" \
       " " + prog_name + " print file1 [file2 ...]\n" \
-      " " + prog_name + " unpack file1 [file2 ...]\n"
+      " " + prog_name + " unpack file1 [file2 ...]\n" \
+      " " + prog_name + " rawunpack file1 [file2 ...]\n"
   exit(1)
 
 def ReadOneFile(path):
@@ -83,15 +84,13 @@
     bits = int(f_map.readline())
     for line in f_map:
       parts = line.rstrip().split()
-      assert len(parts) == 4
       mem_map.append((int(parts[0], 16),
                   int(parts[1], 16),
                   int(parts[2], 16),
-                  parts[3]))
+                  ' '.join(parts[3:])))
   mem_map.sort(key=lambda m : m[0])
   mem_map_keys = [m[0] for m in mem_map]
 
-  print mem_map
   with open(path, mode="rb") as f:
     print >> sys.stderr, "%s: unpacking %s" % (prog_name, path)
 
@@ -109,8 +108,6 @@
       if pc == 0: continue
       map_idx = bisect.bisect(mem_map_keys, pc) - 1
       (start, end, base, module_path) = mem_map[map_idx]
-      print pc
-      print start, end, base, module_path
       assert pc >= start
       if pc >= end:
         print >> sys.stderr, "warning: %s: pc %x outside of any known mapping" % (prog_name, pc)
diff --git a/lib/sanitizer_common/tests/CMakeLists.txt b/lib/sanitizer_common/tests/CMakeLists.txt
index 331117b..cbc0c25 100644
--- a/lib/sanitizer_common/tests/CMakeLists.txt
+++ b/lib/sanitizer_common/tests/CMakeLists.txt
@@ -34,35 +34,21 @@
 endforeach()
 
 set(SANITIZER_TEST_CFLAGS_COMMON
+  ${COMPILER_RT_TEST_CFLAGS}
   ${COMPILER_RT_GTEST_CFLAGS}
   -I${COMPILER_RT_SOURCE_DIR}/include
   -I${COMPILER_RT_SOURCE_DIR}/lib
   -I${COMPILER_RT_SOURCE_DIR}/lib/sanitizer_common
-  -DGTEST_HAS_RTTI=0
+  -fno-rtti
   -O2
   -Werror=sign-compare
   -Wno-non-virtual-dtor)
 
-append_if(COMPILER_RT_HAS_G_FLAG -g SANITIZER_TEST_CFLAGS_COMMON)
-append_if(COMPILER_RT_HAS_Zi_FLAG -Zi SANITIZER_TEST_CFLAGS_COMMON)
-
-append_if(COMPILER_RT_HAS_FNO_RTTI_FLAG -fno-rtti SANITIZER_TEST_CFLAGS_COMMON)
-append_if(COMPILER_RT_HAS_GR_FLAG -GR- SANITIZER_TEST_CFLAGS_COMMON)
-
-if(MSVC)
-  # System headers and gtest use a lot of deprecated stuff.
-  list(APPEND SANITIZER_TEST_CFLAGS_COMMON
-       -Wno-deprecated-declarations)
-
-  # clang-cl doesn't support exceptions yet.
-  list(APPEND SANITIZER_TEST_CFLAGS_COMMON
-       /fallback
-       -D_HAS_EXCEPTIONS=0)
-
-  # We should teach clang-cl to understand more pragmas.
-  list(APPEND SANITIZER_TEST_CFLAGS_COMMON
-       -Wno-unknown-pragmas
-       -Wno-undefined-inline)
+# -gline-tables-only must be enough for these tests, so use it if possible.
+if(COMPILER_RT_TEST_COMPILER_ID MATCHES "Clang")
+  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -gline-tables-only)
+else()
+  list(APPEND ASAN_UNITTEST_COMMON_CFLAGS -g)
 endif()
 
 if(NOT MSVC)
@@ -70,10 +56,11 @@
 endif()
 
 append_if(COMPILER_RT_HAS_LIBDL -ldl SANITIZER_TEST_LINK_FLAGS_COMMON)
-append_if(COMPILER_RT_HAS_LIBPTHREAD -lpthread SANITIZER_TEST_LINK_FLAGS_COMMON)
-# x86_64 FreeBSD 9.2 additionally requires libc++ to build the tests.
+append_if(COMPILER_RT_HAS_LIBPTHREAD -pthread SANITIZER_TEST_LINK_FLAGS_COMMON)
+# x86_64 FreeBSD 9.2 additionally requires libc++ to build the tests. Also,
+# 'libm' shall be specified explicitly to build i386 tests.
 if(CMAKE_SYSTEM MATCHES "FreeBSD-9.2-RELEASE")
-  list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON "-lc++")
+  list(APPEND SANITIZER_TEST_LINK_FLAGS_COMMON "-lc++ -lm")
 endif()
 
 include_directories(..)
diff --git a/lib/sanitizer_common/tests/sanitizer_deadlock_detector_test.cc b/lib/sanitizer_common/tests/sanitizer_deadlock_detector_test.cc
index ac19dcf..8c83633 100644
--- a/lib/sanitizer_common/tests/sanitizer_deadlock_detector_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_deadlock_detector_test.cc
@@ -268,9 +268,14 @@
   }
   EXPECT_EQ(d.testOnlyGetEpoch(), 4 * d.size());
 
+#if TSAN_DEBUG == 0
+  // EXPECT_DEATH clones a thread with 4K stack,
+  // which is overflown by tsan memory accesses functions in debug mode.
+
   // Can not handle the locks from the previous epoch.
   // The caller should update the lock id.
   EXPECT_DEATH(d.onLock(&dtls, l0), "CHECK failed.*current_epoch_");
+#endif
 }
 
 TEST(DeadlockDetector, MultipleEpochsTest) {
diff --git a/lib/sanitizer_common/tests/sanitizer_flags_test.cc b/lib/sanitizer_common/tests/sanitizer_flags_test.cc
index 833816d..1055f5d 100644
--- a/lib/sanitizer_common/tests/sanitizer_flags_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_flags_test.cc
@@ -49,7 +49,7 @@
 
 TEST(SanitizerCommon, IntFlags) {
   TestFlag(-11, 0, -11);
-  TestFlag(-11, "flag_name", 0);
+  TestFlag(-11, "flag_name", -11);
   TestFlag(-11, "--flag_name=", 0);
   TestFlag(-11, "--flag_name=42", 42);
   TestFlag(-11, "--flag_name=-42", -42);
@@ -57,7 +57,7 @@
 
 TEST(SanitizerCommon, StrFlags) {
   TestStrFlag("zzz", 0, "zzz");
-  TestStrFlag("zzz", "flag_name", "");
+  TestStrFlag("zzz", "flag_name", "zzz");
   TestStrFlag("zzz", "--flag_name=", "");
   TestStrFlag("", "--flag_name=abc", "abc");
   TestStrFlag("", "--flag_name='abc zxc'", "abc zxc");
diff --git a/lib/sanitizer_common/tests/sanitizer_ioctl_test.cc b/lib/sanitizer_common/tests/sanitizer_ioctl_test.cc
index 4a32af5..22fa522 100644
--- a/lib/sanitizer_common/tests/sanitizer_ioctl_test.cc
+++ b/lib/sanitizer_common/tests/sanitizer_ioctl_test.cc
@@ -75,4 +75,29 @@
   EXPECT_EQ(EVIOCGKEY(0), desc->req);
 }
 
+// Test decoding KVM ioctl numbers.
+TEST(SanitizerIoctl, KVM_GET_MP_STATE) {
+  ioctl_desc desc;
+  bool res = ioctl_decode(0x8004ae98U, &desc);
+  EXPECT_TRUE(res);
+  EXPECT_EQ(ioctl_desc::WRITE, desc.type);
+  EXPECT_EQ(4U, desc.size);
+}
+
+TEST(SanitizerIoctl, KVM_GET_LAPIC) {
+  ioctl_desc desc;
+  bool res = ioctl_decode(0x8400ae8eU, &desc);
+  EXPECT_TRUE(res);
+  EXPECT_EQ(ioctl_desc::WRITE, desc.type);
+  EXPECT_EQ(1024U, desc.size);
+}
+
+TEST(SanitizerIoctl, KVM_GET_MSR_INDEX_LIST) {
+  ioctl_desc desc;
+  bool res = ioctl_decode(0xc004ae02U, &desc);
+  EXPECT_TRUE(res);
+  EXPECT_EQ(ioctl_desc::READWRITE, desc.type);
+  EXPECT_EQ(4U, desc.size);
+}
+
 #endif // SANITIZER_LINUX
diff --git a/lib/tsan/CMakeLists.txt b/lib/tsan/CMakeLists.txt
index 3a71e9a..19efb54 100644
--- a/lib/tsan/CMakeLists.txt
+++ b/lib/tsan/CMakeLists.txt
@@ -38,6 +38,7 @@
   rtl/tsan_rtl_mutex.cc
   rtl/tsan_rtl_report.cc
   rtl/tsan_rtl_thread.cc
+  rtl/tsan_stack_trace.cc
   rtl/tsan_stat.cc
   rtl/tsan_suppressions.cc
   rtl/tsan_symbolize.cc
@@ -54,6 +55,7 @@
 set(TSAN_HEADERS
   rtl/tsan_clock.h
   rtl/tsan_defs.h
+  rtl/tsan_dense_alloc.h
   rtl/tsan_fd.h
   rtl/tsan_flags.h
   rtl/tsan_ignoreset.h
@@ -67,6 +69,7 @@
   rtl/tsan_platform.h
   rtl/tsan_report.h
   rtl/tsan_rtl.h
+  rtl/tsan_stack_trace.h
   rtl/tsan_stat.h
   rtl/tsan_suppressions.h
   rtl/tsan_symbolize.h
@@ -101,7 +104,7 @@
 
 # Build libcxx instrumented with TSan.
 if(COMPILER_RT_HAS_LIBCXX_SOURCES AND
-   COMPILER_RT_TEST_COMPILER STREQUAL "Clang")
+   COMPILER_RT_TEST_COMPILER_ID STREQUAL "Clang")
   set(LIBCXX_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libcxx_tsan)
   add_custom_libcxx(libcxx_tsan ${LIBCXX_PREFIX}
     DEPS ${TSAN_RUNTIME_LIBRARIES}
diff --git a/lib/tsan/check_analyze.sh b/lib/tsan/check_analyze.sh
index 39d570b..08bfc7a 100755
--- a/lib/tsan/check_analyze.sh
+++ b/lib/tsan/check_analyze.sh
@@ -8,11 +8,11 @@
 
 PrintRes
 
-mops="write1 \
+wmops="write1 \
       write2 \
       write4 \
-      write8 \
-      read1 \
+      write8"
+rmops="read1 \
       read2 \
       read4 \
       read8"
@@ -27,10 +27,16 @@
   fi
 }
 
-for f in $mops; do
-  check $f rsp 1   # To read caller pc.
-  check $f push 0
-  check $f pop 0
+for f in $wmops; do
+  check $f rsp 3
+  check $f push 1
+  check $f pop 5
+done
+
+for f in $rmops; do
+  check $f rsp 3
+  check $f push 1
+  check $f pop 4
 done
 
 for f in $func; do
diff --git a/lib/tsan/check_memcpy.sh b/lib/tsan/check_memcpy.sh
index fe3e49e..101df11 100755
--- a/lib/tsan/check_memcpy.sh
+++ b/lib/tsan/check_memcpy.sh
@@ -17,7 +17,14 @@
 $CXX $SRC $CFLAGS -c -o $OBJ
 $CXX $OBJ $LDFLAGS -o $EXE
 
-NCALL=$(objdump -d $EXE | egrep "callq .*__interceptor_mem(cpy|set)" | wc -l)
+NCALL=$(objdump -d $EXE | egrep "callq .*<__interceptor_mem(cpy|set)>" | wc -l)
+if [ "$NCALL" != "0" ]; then
+  echo FAIL: found $NCALL memcpy/memset calls
+  exit 1
+fi
+
+# tail calls
+NCALL=$(objdump -d $EXE | egrep "jmpq .*<__interceptor_mem(cpy|set)>" | wc -l)
 if [ "$NCALL" != "0" ]; then
   echo FAIL: found $NCALL memcpy/memset calls
   exit 1
diff --git a/lib/tsan/dd/CMakeLists.txt b/lib/tsan/dd/CMakeLists.txt
index a21e2dd..9328721 100644
--- a/lib/tsan/dd/CMakeLists.txt
+++ b/lib/tsan/dd/CMakeLists.txt
@@ -34,18 +34,15 @@
     CFLAGS ${DD_CFLAGS}
     DEFS ${DD_COMMON_DEFINITIONS})
 
-  add_library(RTDD OBJECT ${DD_SOURCES})
-  set_target_compile_flags(RTDD ${DD_CFLAGS})
-  set_property(TARGET RTDD APPEND PROPERTY
-    COMPILE_DEFINITIONS ${DD_COMMON_DEFINITIONS})
-  set_property(TARGET RTDD APPEND PROPERTY
-    COMPILE_DEFINITIONS ${DD_DYNAMIC_DEFINITIONS})
+  add_compiler_rt_object_library(RTDD ${arch}
+    SOURCES ${DD_SOURCES} CFLAGS ${DD_CFLAGS}
+    DEFS ${DD_COMMON_DEFINITIONS} ${DD_DYNAMIC_DEFINITIONS})
 
-  add_library(clang_rt.dyndd-${arch} SHARED
-    $<TARGET_OBJECTS:RTDD>
-    $<TARGET_OBJECTS:RTInterception.${arch}>
-    $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
-    $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>)
+  add_compiler_rt_runtime(clang_rt.dyndd-${arch} ${arch} SHARED
+    SOURCES $<TARGET_OBJECTS:RTDD.${arch}>
+            $<TARGET_OBJECTS:RTInterception.${arch}>
+            $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+            $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>)
   target_link_libraries(clang_rt.dyndd-${arch} pthread dl)
 endif()
 
diff --git a/lib/tsan/go/build.bat b/lib/tsan/go/build.bat
index bc56784..4480e66 100644
--- a/lib/tsan/go/build.bat
+++ b/lib/tsan/go/build.bat
@@ -1,4 +1,4 @@
-type tsan_go.cc ..\rtl\tsan_clock.cc ..\rtl\tsan_flags.cc ..\rtl\tsan_md5.cc ..\rtl\tsan_mutex.cc ..\rtl\tsan_report.cc ..\rtl\tsan_rtl.cc ..\rtl\tsan_rtl_mutex.cc ..\rtl\tsan_rtl_report.cc ..\rtl\tsan_rtl_thread.cc ..\rtl\tsan_stat.cc ..\rtl\tsan_suppressions.cc ..\rtl\tsan_sync.cc ..\..\sanitizer_common\sanitizer_allocator.cc ..\..\sanitizer_common\sanitizer_common.cc ..\..\sanitizer_common\sanitizer_flags.cc ..\..\sanitizer_common\sanitizer_stacktrace.cc ..\..\sanitizer_common\sanitizer_libc.cc ..\..\sanitizer_common\sanitizer_printf.cc ..\..\sanitizer_common\sanitizer_suppressions.cc ..\..\sanitizer_common\sanitizer_thread_registry.cc ..\rtl\tsan_platform_windows.cc ..\..\sanitizer_common\sanitizer_win.cc ..\..\sanitizer_common\sanitizer_deadlock_detector1.cc > gotsan.cc
+type tsan_go.cc ..\rtl\tsan_clock.cc ..\rtl\tsan_flags.cc ..\rtl\tsan_md5.cc ..\rtl\tsan_mutex.cc ..\rtl\tsan_report.cc ..\rtl\tsan_rtl.cc ..\rtl\tsan_rtl_mutex.cc ..\rtl\tsan_rtl_report.cc ..\rtl\tsan_rtl_thread.cc ..\rtl\tsan_stat.cc ..\rtl\tsan_suppressions.cc ..\rtl\tsan_sync.cc ..\rtl\tsan_stack_trace.cc ..\..\sanitizer_common\sanitizer_allocator.cc ..\..\sanitizer_common\sanitizer_common.cc ..\..\sanitizer_common\sanitizer_flags.cc ..\..\sanitizer_common\sanitizer_stacktrace.cc ..\..\sanitizer_common\sanitizer_libc.cc ..\..\sanitizer_common\sanitizer_printf.cc ..\..\sanitizer_common\sanitizer_suppressions.cc ..\..\sanitizer_common\sanitizer_thread_registry.cc ..\rtl\tsan_platform_windows.cc ..\..\sanitizer_common\sanitizer_win.cc ..\..\sanitizer_common\sanitizer_deadlock_detector1.cc ..\..\sanitizer_common\sanitizer_stackdepot.cc ..\..\sanitizer_common\sanitizer_persistent_allocator.cc > gotsan.cc
 
 gcc -c -o race_windows_amd64.syso gotsan.cc -I..\rtl -I..\.. -I..\..\sanitizer_common -I..\..\..\include -m64 -Wall -fno-exceptions -fno-rtti -DTSAN_GO -DSANITIZER_GO -DTSAN_SHADOW_COUNT=4 -Wno-error=attributes -Wno-attributes -Wno-format -DTSAN_DEBUG=0 -O3 -fomit-frame-pointer
 
diff --git a/lib/tsan/go/buildgo.sh b/lib/tsan/go/buildgo.sh
index f9db35f..f8eb081 100755
--- a/lib/tsan/go/buildgo.sh
+++ b/lib/tsan/go/buildgo.sh
@@ -1,4 +1,3 @@
-#!/bin/bash
 set -e
 
 SRCS="
@@ -12,6 +11,7 @@
 	../rtl/tsan_rtl_mutex.cc
 	../rtl/tsan_rtl_report.cc
 	../rtl/tsan_rtl_thread.cc
+	../rtl/tsan_stack_trace.cc
 	../rtl/tsan_stat.cc
 	../rtl/tsan_suppressions.cc
 	../rtl/tsan_sync.cc
@@ -29,7 +29,7 @@
 
 if [ "`uname -a | grep Linux`" != "" ]; then
 	SUFFIX="linux_amd64"
-	OSCFLAGS="-fPIC -ffreestanding -Wno-maybe-uninitialized -Werror"
+	OSCFLAGS="-fPIC -ffreestanding -Wno-maybe-uninitialized -Wno-unused-const-variable -Werror -Wno-unknown-warning-option"
 	OSLDFLAGS="-lpthread -fPIC -fpie"
 	SRCS+="
 		../rtl/tsan_platform_linux.cc
@@ -39,9 +39,21 @@
 		../../sanitizer_common/sanitizer_linux.cc
 		../../sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
 	"
+elif [ "`uname -a | grep FreeBSD`" != "" ]; then
+        SUFFIX="freebsd_amd64"
+        OSCFLAGS="-fno-strict-aliasing -fPIC -Werror"
+        OSLDFLAGS="-lpthread -fPIC -fpie"
+        SRCS+="
+                ../rtl/tsan_platform_linux.cc
+                ../../sanitizer_common/sanitizer_posix.cc
+                ../../sanitizer_common/sanitizer_posix_libcdep.cc
+                ../../sanitizer_common/sanitizer_procmaps_linux.cc
+                ../../sanitizer_common/sanitizer_linux.cc
+                ../../sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
+        "
 elif [ "`uname -a | grep Darwin`" != "" ]; then
 	SUFFIX="darwin_amd64"
-	OSCFLAGS="-fPIC"
+	OSCFLAGS="-fPIC -Wno-unused-const-variable -Wno-unknown-warning-option"
 	OSLDFLAGS="-lpthread -fPIC -fpie"
 	SRCS+="
 		../rtl/tsan_platform_mac.cc
@@ -52,7 +64,7 @@
 	"
 elif [ "`uname -a | grep MINGW`" != "" ]; then
 	SUFFIX="windows_amd64"
-	OSCFLAGS="-Wno-error=attributes -Wno-attributes"
+	OSCFLAGS="-Wno-error=attributes -Wno-attributes -Wno-unused-const-variable -Wno-unknown-warning-option"
 	OSLDFLAGS=""
 	SRCS+="
 		../rtl/tsan_platform_windows.cc
@@ -72,13 +84,15 @@
 
 FLAGS=" -I../rtl -I../.. -I../../sanitizer_common -I../../../include -m64 -Wall -fno-exceptions -fno-rtti -DTSAN_GO -DSANITIZER_GO -DTSAN_SHADOW_COUNT=4 -DSANITIZER_DEADLOCK_DETECTOR_VERSION=2 $OSCFLAGS"
 if [ "$DEBUG" == "" ]; then
-	FLAGS+=" -DTSAN_DEBUG=0 -O3 -fomit-frame-pointer"
+	FLAGS+=" -DTSAN_DEBUG=0 -O3 -msse3 -fomit-frame-pointer"
 else
 	FLAGS+=" -DTSAN_DEBUG=1 -g"
 fi
 
-echo gcc gotsan.cc -S -o tmp.s $FLAGS $CFLAGS
-gcc gotsan.cc -c -o race_$SUFFIX.syso $FLAGS $CFLAGS
+CC=${CC:-gcc}
 
-gcc test.c race_$SUFFIX.syso -m64 -o test $OSLDFLAGS
+echo $CC gotsan.cc -c -o race_$SUFFIX.syso $FLAGS $CFLAGS
+$CC gotsan.cc -c -o race_$SUFFIX.syso $FLAGS $CFLAGS
+
+$CC test.c race_$SUFFIX.syso -m64 -o test $OSLDFLAGS
 GORACE="exitcode=0 atexit_sleep_ms=0" ./test
diff --git a/lib/tsan/go/tsan_go.cc b/lib/tsan/go/tsan_go.cc
index e7761fe..5e22092 100644
--- a/lib/tsan/go/tsan_go.cc
+++ b/lib/tsan/go/tsan_go.cc
@@ -191,17 +191,17 @@
   AcquireGlobal(thr, 0);
 }
 
-void __tsan_mutex_before_lock(ThreadState *thr, uptr addr, bool write) {
+void __tsan_mutex_before_lock(ThreadState *thr, uptr addr, uptr write) {
 }
 
-void __tsan_mutex_after_lock(ThreadState *thr, uptr addr, bool write) {
+void __tsan_mutex_after_lock(ThreadState *thr, uptr addr, uptr write) {
   if (write)
     MutexLock(thr, 0, addr);
   else
     MutexReadLock(thr, 0, addr);
 }
 
-void __tsan_mutex_before_unlock(ThreadState *thr, uptr addr, bool write) {
+void __tsan_mutex_before_unlock(ThreadState *thr, uptr addr, uptr write) {
   if (write)
     MutexUnlock(thr, 0, addr);
   else
diff --git a/lib/tsan/rtl/Makefile.old b/lib/tsan/rtl/Makefile.old
index 2a71869..79c761c 100644
--- a/lib/tsan/rtl/Makefile.old
+++ b/lib/tsan/rtl/Makefile.old
@@ -1,4 +1,4 @@
-CXXFLAGS = -std=c++11 -fPIE -g -Wall -Werror -fno-builtin -DTSAN_DEBUG=$(DEBUG) -DSANITIZER_DEBUG=$(DEBUG)
+CXXFLAGS = -std=c++11 -fPIE -g -Wall -Werror -fno-builtin -msse3 -DTSAN_DEBUG=$(DEBUG) -DSANITIZER_DEBUG=$(DEBUG)
 CLANG=clang
 ifeq ($(DEBUG), 0)
   CXXFLAGS += -O3
diff --git a/lib/tsan/rtl/tsan_clock.cc b/lib/tsan/rtl/tsan_clock.cc
index d40f40f..e140a3c 100644
--- a/lib/tsan/rtl/tsan_clock.cc
+++ b/lib/tsan/rtl/tsan_clock.cc
@@ -330,6 +330,11 @@
 
 void SyncClock::Reset() {
   clk_.Reset();
+  Zero();
+}
+
+void SyncClock::Zero() {
+  clk_.Resize(0);
   release_store_tid_ = kInvalidTid;
   release_store_reused_ = 0;
   for (uptr i = 0; i < kDirtyTids; i++)
diff --git a/lib/tsan/rtl/tsan_clock.h b/lib/tsan/rtl/tsan_clock.h
index 931fde8..f7ab69a 100644
--- a/lib/tsan/rtl/tsan_clock.h
+++ b/lib/tsan/rtl/tsan_clock.h
@@ -38,6 +38,7 @@
   }
 
   void Reset();
+  void Zero();
 
   void DebugDump(int(*printf)(const char *s, ...));
 
diff --git a/lib/tsan/rtl/tsan_defs.h b/lib/tsan/rtl/tsan_defs.h
index 0ee19e9..969d09f 100644
--- a/lib/tsan/rtl/tsan_defs.h
+++ b/lib/tsan/rtl/tsan_defs.h
@@ -54,6 +54,7 @@
 # endif
 #else
 // Count of shadow values in a shadow cell.
+#define TSAN_SHADOW_COUNT 4
 const uptr kShadowCnt = 4;
 #endif
 
@@ -66,6 +67,13 @@
 // Shadow memory is kShadowMultiplier times larger than user memory.
 const uptr kShadowMultiplier = kShadowSize * kShadowCnt / kShadowCell;
 
+// That many user bytes are mapped onto a single meta shadow cell.
+// Must be less or equal to minimal memory allocator alignment.
+const uptr kMetaShadowCell = 8;
+
+// Size of a single meta shadow value (u32).
+const uptr kMetaShadowSize = 4;
+
 #if defined(TSAN_NO_HISTORY) && TSAN_NO_HISTORY
 const bool kCollectHistory = false;
 #else
@@ -167,7 +175,15 @@
 class ReportDesc;
 class RegionAlloc;
 class StackTrace;
-struct MBlock;
+
+// Descriptor of user's memory block.
+struct MBlock {
+  u64  siz;
+  u32  stk;
+  u16  tid;
+};
+
+COMPILER_CHECK(sizeof(MBlock) == 16);
 
 }  // namespace __tsan
 
diff --git a/lib/tsan/rtl/tsan_dense_alloc.h b/lib/tsan/rtl/tsan_dense_alloc.h
new file mode 100644
index 0000000..2c2e75e
--- /dev/null
+++ b/lib/tsan/rtl/tsan_dense_alloc.h
@@ -0,0 +1,136 @@
+//===-- tsan_dense_alloc.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// A DenseSlabAlloc is a freelist-based allocator of fixed-size objects.
+// DenseSlabAllocCache is a thread-local cache for DenseSlabAlloc.
+// The only difference with traditional slab allocators is that DenseSlabAlloc
+// allocates/free indices of objects and provide a functionality to map
+// the index onto the real pointer. The index is u32, that is, 2 times smaller
+// than uptr (hense the Dense prefix).
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_DENSE_ALLOC_H
+#define TSAN_DENSE_ALLOC_H
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "tsan_defs.h"
+#include "tsan_mutex.h"
+
+namespace __tsan {
+
+class DenseSlabAllocCache {
+  static const uptr kSize = 128;
+  typedef u32 IndexT;
+  uptr pos;
+  IndexT cache[kSize];
+  template<typename T, uptr kL1Size, uptr kL2Size> friend class DenseSlabAlloc;
+};
+
+template<typename T, uptr kL1Size, uptr kL2Size>
+class DenseSlabAlloc {
+ public:
+  typedef DenseSlabAllocCache Cache;
+  typedef typename Cache::IndexT IndexT;
+
+  DenseSlabAlloc() {
+    // Check that kL1Size and kL2Size are sane.
+    CHECK_EQ(kL1Size & (kL1Size - 1), 0);
+    CHECK_EQ(kL2Size & (kL2Size - 1), 0);
+    CHECK_GE(1ull << (sizeof(IndexT) * 8), kL1Size * kL2Size);
+    // Check that it makes sense to use the dense alloc.
+    CHECK_GE(sizeof(T), sizeof(IndexT));
+    internal_memset(map_, 0, sizeof(map_));
+    freelist_ = 0;
+    fillpos_ = 0;
+  }
+
+  ~DenseSlabAlloc() {
+    for (uptr i = 0; i < kL1Size; i++) {
+      if (map_[i] != 0)
+        UnmapOrDie(map_[i], kL2Size * sizeof(T));
+    }
+  }
+
+  IndexT Alloc(Cache *c) {
+    if (c->pos == 0)
+      Refill(c);
+    return c->cache[--c->pos];
+  }
+
+  void Free(Cache *c, IndexT idx) {
+    if (c->pos == Cache::kSize)
+      Drain(c);
+    c->cache[c->pos++] = idx;
+  }
+
+  T *Map(IndexT idx) {
+    DCHECK_NE(idx, 0);
+    DCHECK_LE(idx, kL1Size * kL2Size);
+    return &map_[idx / kL2Size][idx % kL2Size];
+  }
+
+  void FlushCache(Cache *c) {
+    SpinMutexLock lock(&mtx_);
+    while (c->pos) {
+      IndexT idx = c->cache[--c->pos];
+      *(IndexT*)Map(idx) = freelist_;
+      freelist_ = idx;
+    }
+  }
+
+  void InitCache(Cache *c) {
+    c->pos = 0;
+    internal_memset(c->cache, 0, sizeof(c->cache));
+  }
+
+ private:
+  T *map_[kL1Size];
+  SpinMutex mtx_;
+  IndexT freelist_;
+  uptr fillpos_;
+
+  void Refill(Cache *c) {
+    SpinMutexLock lock(&mtx_);
+    if (freelist_ == 0) {
+      if (fillpos_ == kL1Size) {
+        Printf("ThreadSanitizer: DenseSlabAllocator overflow. Dying.\n");
+        Die();
+      }
+      T *batch = (T*)MmapOrDie(kL2Size * sizeof(T), "DenseSlabAllocator");
+      // Reserve 0 as invalid index.
+      IndexT start = fillpos_ == 0 ? 1 : 0;
+      for (IndexT i = start; i < kL2Size; i++) {
+        new(batch + i) T();
+        *(IndexT*)(batch + i) = i + 1 + fillpos_ * kL2Size;
+      }
+      *(IndexT*)(batch + kL2Size - 1) = 0;
+      freelist_ = fillpos_ * kL2Size + start;
+      map_[fillpos_++] = batch;
+    }
+    for (uptr i = 0; i < Cache::kSize / 2 && freelist_ != 0; i++) {
+      IndexT idx = freelist_;
+      c->cache[c->pos++] = idx;
+      freelist_ = *(IndexT*)Map(idx);
+    }
+  }
+
+  void Drain(Cache *c) {
+    SpinMutexLock lock(&mtx_);
+    for (uptr i = 0; i < Cache::kSize / 2; i++) {
+      IndexT idx = c->cache[--c->pos];
+      *(IndexT*)Map(idx) = freelist_;
+      freelist_ = idx;
+    }
+  }
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_DENSE_ALLOC_H
diff --git a/lib/tsan/rtl/tsan_fd.cc b/lib/tsan/rtl/tsan_fd.cc
index 6c7fc17..68242e0 100644
--- a/lib/tsan/rtl/tsan_fd.cc
+++ b/lib/tsan/rtl/tsan_fd.cc
@@ -47,8 +47,8 @@
   return fd < 0 || fd >= kTableSize;
 }
 
-static FdSync *allocsync() {
-  FdSync *s = (FdSync*)internal_alloc(MBlockFD, sizeof(FdSync));
+static FdSync *allocsync(ThreadState *thr, uptr pc) {
+  FdSync *s = (FdSync*)user_alloc(thr, pc, sizeof(FdSync));
   atomic_store(&s->rc, 1, memory_order_relaxed);
   return s;
 }
@@ -65,10 +65,7 @@
       CHECK_NE(s, &fdctx.globsync);
       CHECK_NE(s, &fdctx.filesync);
       CHECK_NE(s, &fdctx.socksync);
-      SyncVar *v = ctx->synctab.GetAndRemove(thr, pc, (uptr)s);
-      if (v)
-        DestroyAndFree(v);
-      internal_free(s);
+      user_free(thr, pc, s);
     }
   }
 }
@@ -219,7 +216,7 @@
 
 void FdPipeCreate(ThreadState *thr, uptr pc, int rfd, int wfd) {
   DPrintf("#%d: FdCreatePipe(%d, %d)\n", thr->tid, rfd, wfd);
-  FdSync *s = allocsync();
+  FdSync *s = allocsync(thr, pc);
   init(thr, pc, rfd, ref(s));
   init(thr, pc, wfd, ref(s));
   unref(thr, pc, s);
@@ -229,7 +226,7 @@
   DPrintf("#%d: FdEventCreate(%d)\n", thr->tid, fd);
   if (bogusfd(fd))
     return;
-  init(thr, pc, fd, allocsync());
+  init(thr, pc, fd, allocsync(thr, pc));
 }
 
 void FdSignalCreate(ThreadState *thr, uptr pc, int fd) {
@@ -250,7 +247,7 @@
   DPrintf("#%d: FdPollCreate(%d)\n", thr->tid, fd);
   if (bogusfd(fd))
     return;
-  init(thr, pc, fd, allocsync());
+  init(thr, pc, fd, allocsync(thr, pc));
 }
 
 void FdSocketCreate(ThreadState *thr, uptr pc, int fd) {
diff --git a/lib/tsan/rtl/tsan_flags.cc b/lib/tsan/rtl/tsan_flags.cc
index 1431200..123df49 100644
--- a/lib/tsan/rtl/tsan_flags.cc
+++ b/lib/tsan/rtl/tsan_flags.cc
@@ -102,6 +102,7 @@
   SetCommonFlagsDefaults(f);
   // Override some common flags defaults.
   f->allow_addr2line = true;
+  f->detect_deadlocks = true;
 
   // Let a frontend override.
   ParseFlags(f, __tsan_default_options());
diff --git a/lib/tsan/rtl/tsan_interceptors.cc b/lib/tsan/rtl/tsan_interceptors.cc
index 82c4da8..100834e 100644
--- a/lib/tsan/rtl/tsan_interceptors.cc
+++ b/lib/tsan/rtl/tsan_interceptors.cc
@@ -47,7 +47,7 @@
 extern "C" int pthread_attr_setstacksize(void *attr, uptr stacksize);
 extern "C" int pthread_key_create(unsigned *key, void (*destructor)(void* v));
 extern "C" int pthread_setspecific(unsigned key, const void *v);
-extern "C" int pthread_mutexattr_gettype(void *a, int *type);
+DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
 extern "C" int pthread_yield();
 extern "C" int pthread_sigmask(int how, const __sanitizer_sigset_t *set,
                                __sanitizer_sigset_t *oldset);
@@ -191,6 +191,7 @@
   if (!thr_->ignore_interceptors) {
     ProcessPendingSignals(thr_);
     FuncExit(thr_);
+    CheckNoLocks(thr_);
   }
 }
 
@@ -519,7 +520,7 @@
 
 TSAN_INTERCEPTOR(uptr, malloc_usable_size, void *p) {
   SCOPED_INTERCEPTOR_RAW(malloc_usable_size, p);
-  return user_alloc_usable_size(thr, pc, p);
+  return user_alloc_usable_size(p);
 }
 
 #define OPERATOR_NEW_BODY(mangled_name) \
@@ -737,6 +738,11 @@
   return user_alloc(thr, pc, sz, align);
 }
 
+TSAN_INTERCEPTOR(void*, aligned_alloc, uptr align, uptr sz) {
+  SCOPED_INTERCEPTOR_RAW(memalign, align, sz);
+  return user_alloc(thr, pc, sz, align);
+}
+
 TSAN_INTERCEPTOR(void*, valloc, uptr sz) {
   SCOPED_INTERCEPTOR_RAW(valloc, sz);
   return user_alloc(thr, pc, sz, GetPageSizeCached());
@@ -1028,7 +1034,7 @@
     bool recursive = false;
     if (a) {
       int type = 0;
-      if (pthread_mutexattr_gettype(a, &type) == 0)
+      if (REAL(pthread_mutexattr_gettype)(a, &type) == 0)
         recursive = (type == PTHREAD_MUTEX_RECURSIVE
             || type == PTHREAD_MUTEX_RECURSIVE_NP);
     }
@@ -1139,7 +1145,7 @@
   SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_tryrdlock, m);
   int res = REAL(pthread_rwlock_tryrdlock)(m);
   if (res == 0) {
-    MutexLock(thr, pc, (uptr)m, /*rec=*/1, /*try_lock=*/true);
+    MutexReadLock(thr, pc, (uptr)m, /*try_lock=*/true);
   }
   return res;
 }
@@ -1704,8 +1710,8 @@
     ThreadRegistryLock l(ctx->thread_registry);
     ScopedReport rep(ReportTypeErrnoInSignal);
     if (!IsFiredSuppression(ctx, rep, stack)) {
-      rep.AddStack(&stack);
-      OutputReport(ctx, rep, rep.GetReport()->stacks[0]);
+      rep.AddStack(&stack, true);
+      OutputReport(thr, rep);
     }
   }
   errno = saved_errno;
diff --git a/lib/tsan/rtl/tsan_interface_ann.cc b/lib/tsan/rtl/tsan_interface_ann.cc
index 5632323..a1725cb 100644
--- a/lib/tsan/rtl/tsan_interface_ann.cc
+++ b/lib/tsan/rtl/tsan_interface_ann.cc
@@ -40,6 +40,7 @@
 
   ~ScopedAnnotation() {
     FuncExit(thr_);
+    CheckNoLocks(thr_);
   }
  private:
   ThreadState *const thr_;
diff --git a/lib/tsan/rtl/tsan_interface_atomic.cc b/lib/tsan/rtl/tsan_interface_atomic.cc
index 2de0c4f..7fbc9c6 100644
--- a/lib/tsan/rtl/tsan_interface_atomic.cc
+++ b/lib/tsan/rtl/tsan_interface_atomic.cc
@@ -291,7 +291,7 @@
     MemoryReadAtomic(thr, pc, (uptr)a, SizeLog<T>());
     return NoTsanAtomicLoad(a, mo);
   }
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, false);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, false);
   AcquireImpl(thr, pc, &s->clock);
   T v = NoTsanAtomicLoad(a, mo);
   s->mtx.ReadUnlock();
@@ -325,7 +325,7 @@
     return;
   }
   __sync_synchronize();
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, true);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true);
   thr->fast_state.IncrementEpoch();
   // Can't increment epoch w/o writing to the trace as well.
   TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
@@ -339,7 +339,7 @@
   MemoryWriteAtomic(thr, pc, (uptr)a, SizeLog<T>());
   SyncVar *s = 0;
   if (mo != mo_relaxed) {
-    s = ctx->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, true);
+    s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, true);
     thr->fast_state.IncrementEpoch();
     // Can't increment epoch w/o writing to the trace as well.
     TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
@@ -463,7 +463,7 @@
   SyncVar *s = 0;
   bool write_lock = mo != mo_acquire && mo != mo_consume;
   if (mo != mo_relaxed) {
-    s = ctx->synctab.GetOrCreateAndLock(thr, pc, (uptr)a, write_lock);
+    s = ctx->metamap.GetOrCreateAndLock(thr, pc, (uptr)a, write_lock);
     thr->fast_state.IncrementEpoch();
     // Can't increment epoch w/o writing to the trace as well.
     TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
diff --git a/lib/tsan/rtl/tsan_interface_java.cc b/lib/tsan/rtl/tsan_interface_java.cc
index d0c003e..5dfb476 100644
--- a/lib/tsan/rtl/tsan_interface_java.cc
+++ b/lib/tsan/rtl/tsan_interface_java.cc
@@ -22,54 +22,17 @@
 
 using namespace __tsan;  // NOLINT
 
+const jptr kHeapAlignment = 8;
+
 namespace __tsan {
 
-const uptr kHeapShadow = 0x300000000000ull;
-const uptr kHeapAlignment = 8;
-
-struct BlockDesc {
-  bool begin;
-  Mutex mtx;
-  SyncVar *head;
-
-  BlockDesc()
-      : mtx(MutexTypeJavaMBlock, StatMtxJavaMBlock)
-      , head() {
-    CHECK_EQ(begin, false);
-    begin = true;
-  }
-
-  ~BlockDesc() {
-    CHECK_EQ(begin, true);
-    begin = false;
-    ThreadState *thr = cur_thread();
-    SyncVar *s = head;
-    while (s) {
-      SyncVar *s1 = s->next;
-      StatInc(thr, StatSyncDestroyed);
-      s->mtx.Lock();
-      s->mtx.Unlock();
-      thr->mset.Remove(s->GetId());
-      DestroyAndFree(s);
-      s = s1;
-    }
-  }
-};
-
 struct JavaContext {
   const uptr heap_begin;
   const uptr heap_size;
-  BlockDesc *heap_shadow;
 
   JavaContext(jptr heap_begin, jptr heap_size)
       : heap_begin(heap_begin)
       , heap_size(heap_size) {
-    uptr size = heap_size / kHeapAlignment * sizeof(BlockDesc);
-    heap_shadow = (BlockDesc*)MmapFixedNoReserve(kHeapShadow, size);
-    if ((uptr)heap_shadow != kHeapShadow) {
-      Printf("ThreadSanitizer: failed to mmap Java heap shadow\n");
-      Die();
-    }
   }
 };
 
@@ -93,63 +56,6 @@
 static u64 jctx_buf[sizeof(JavaContext) / sizeof(u64) + 1];
 static JavaContext *jctx;
 
-static BlockDesc *getblock(uptr addr) {
-  uptr i = (addr - jctx->heap_begin) / kHeapAlignment;
-  return &jctx->heap_shadow[i];
-}
-
-static uptr USED getmem(BlockDesc *b) {
-  uptr i = b - jctx->heap_shadow;
-  uptr p = jctx->heap_begin + i * kHeapAlignment;
-  CHECK_GE(p, jctx->heap_begin);
-  CHECK_LT(p, jctx->heap_begin + jctx->heap_size);
-  return p;
-}
-
-static BlockDesc *getblockbegin(uptr addr) {
-  for (BlockDesc *b = getblock(addr);; b--) {
-    CHECK_GE(b, jctx->heap_shadow);
-    if (b->begin)
-      return b;
-  }
-  return 0;
-}
-
-SyncVar* GetJavaSync(ThreadState *thr, uptr pc, uptr addr,
-                     bool write_lock, bool create) {
-  if (jctx == 0 || addr < jctx->heap_begin
-      || addr >= jctx->heap_begin + jctx->heap_size)
-    return 0;
-  BlockDesc *b = getblockbegin(addr);
-  DPrintf("#%d: GetJavaSync %p->%p\n", thr->tid, addr, b);
-  Lock l(&b->mtx);
-  SyncVar *s = b->head;
-  for (; s; s = s->next) {
-    if (s->addr == addr) {
-      DPrintf("#%d: found existing sync for %p\n", thr->tid, addr);
-      break;
-    }
-  }
-  if (s == 0 && create) {
-    DPrintf("#%d: creating new sync for %p\n", thr->tid, addr);
-    s = ctx->synctab.Create(thr, pc, addr);
-    s->next = b->head;
-    b->head = s;
-  }
-  if (s) {
-    if (write_lock)
-      s->mtx.Lock();
-    else
-      s->mtx.ReadLock();
-  }
-  return s;
-}
-
-SyncVar* GetAndRemoveJavaSync(ThreadState *thr, uptr pc, uptr addr) {
-  // We do not destroy Java mutexes other than in __tsan_java_free().
-  return 0;
-}
-
 }  // namespace __tsan
 
 #define SCOPED_JAVA_FUNC(func) \
@@ -192,8 +98,7 @@
   CHECK_GE(ptr, jctx->heap_begin);
   CHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
 
-  BlockDesc *b = getblock(ptr);
-  new(b) BlockDesc();
+  OnUserAlloc(thr, pc, ptr, size, false);
 }
 
 void __tsan_java_free(jptr ptr, jptr size) {
@@ -206,12 +111,7 @@
   CHECK_GE(ptr, jctx->heap_begin);
   CHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
 
-  BlockDesc *beg = getblock(ptr);
-  BlockDesc *end = getblock(ptr + size);
-  for (BlockDesc *b = beg; b != end; b++) {
-    if (b->begin)
-      b->~BlockDesc();
-  }
+  ctx->metamap.FreeRange(thr, pc, ptr, size);
 }
 
 void __tsan_java_move(jptr src, jptr dst, jptr size) {
@@ -226,40 +126,34 @@
   CHECK_LE(src + size, jctx->heap_begin + jctx->heap_size);
   CHECK_GE(dst, jctx->heap_begin);
   CHECK_LE(dst + size, jctx->heap_begin + jctx->heap_size);
-  CHECK(dst >= src + size || src >= dst + size);
+  CHECK_NE(dst, src);
+  CHECK_NE(size, 0);
 
   // Assuming it's not running concurrently with threads that do
   // memory accesses and mutex operations (stop-the-world phase).
-  {  // NOLINT
-    BlockDesc *s = getblock(src);
-    BlockDesc *d = getblock(dst);
-    BlockDesc *send = getblock(src + size);
-    for (; s != send; s++, d++) {
-      CHECK_EQ(d->begin, false);
-      if (s->begin) {
-        DPrintf("#%d: moving block %p->%p\n", thr->tid, getmem(s), getmem(d));
-        new(d) BlockDesc;
-        d->head = s->head;
-        for (SyncVar *sync = d->head; sync; sync = sync->next) {
-          uptr newaddr = sync->addr - src + dst;
-          DPrintf("#%d: moving sync %p->%p\n", thr->tid, sync->addr, newaddr);
-          sync->addr = newaddr;
-        }
-        s->head = 0;
-        s->~BlockDesc();
-      }
-    }
-  }
+  ctx->metamap.MoveMemory(src, dst, size);
 
-  {  // NOLINT
-    u64 *s = (u64*)MemToShadow(src);
-    u64 *d = (u64*)MemToShadow(dst);
-    u64 *send = (u64*)MemToShadow(src + size);
-    for (; s != send; s++, d++) {
-      *d = *s;
-      *s = 0;
-    }
+  // Move shadow.
+  u64 *s = (u64*)MemToShadow(src);
+  u64 *d = (u64*)MemToShadow(dst);
+  u64 *send = (u64*)MemToShadow(src + size);
+  uptr inc = 1;
+  if (dst > src) {
+    s = (u64*)MemToShadow(src + size) - 1;
+    d = (u64*)MemToShadow(dst + size) - 1;
+    send = (u64*)MemToShadow(src) - 1;
+    inc = -1;
   }
+  for (; s != send; s += inc, d += inc) {
+    *d = *s;
+    *s = 0;
+  }
+}
+
+void __tsan_java_finalize() {
+  SCOPED_JAVA_FUNC(__tsan_java_finalize);
+  DPrintf("#%d: java_mutex_finalize()\n", thr->tid);
+  AcquireGlobal(thr, 0);
 }
 
 void __tsan_java_mutex_lock(jptr addr) {
diff --git a/lib/tsan/rtl/tsan_interface_java.h b/lib/tsan/rtl/tsan_interface_java.h
index 9ac78e0..1f793df 100644
--- a/lib/tsan/rtl/tsan_interface_java.h
+++ b/lib/tsan/rtl/tsan_interface_java.h
@@ -50,8 +50,13 @@
 void __tsan_java_free(jptr ptr, jptr size) INTERFACE_ATTRIBUTE;
 // Callback for memory move by GC.
 // Can be aggregated for several objects (preferably).
-// The ranges must not overlap.
+// The ranges can overlap.
 void __tsan_java_move(jptr src, jptr dst, jptr size) INTERFACE_ATTRIBUTE;
+// This function must be called on the finalizer thread
+// before executing a batch of finalizers.
+// It ensures necessary synchronization between
+// java object creation and finalization.
+void __tsan_java_finalize() INTERFACE_ATTRIBUTE;
 
 // Mutex lock.
 // Addr is any unique address associated with the mutex.
diff --git a/lib/tsan/rtl/tsan_mman.cc b/lib/tsan/rtl/tsan_mman.cc
index 8941eb1..8542a8f 100644
--- a/lib/tsan/rtl/tsan_mman.cc
+++ b/lib/tsan/rtl/tsan_mman.cc
@@ -10,6 +10,7 @@
 // This file is a part of ThreadSanitizer (TSan), a race detector.
 //
 //===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_allocator_interface.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_placement_new.h"
 #include "tsan_mman.h"
@@ -22,39 +23,20 @@
   (void)ptr;
   (void)size;
 }
+extern "C" void WEAK __sanitizer_malloc_hook(void *ptr, uptr size) {
+  (void)ptr;
+  (void)size;
+}
 
 extern "C" void WEAK __tsan_free_hook(void *ptr) {
   (void)ptr;
 }
+extern "C" void WEAK __sanitizer_free_hook(void *ptr) {
+  (void)ptr;
+}
 
 namespace __tsan {
 
-COMPILER_CHECK(sizeof(MBlock) == 16);
-
-void MBlock::Lock() {
-  atomic_uintptr_t *a = reinterpret_cast<atomic_uintptr_t*>(this);
-  uptr v = atomic_load(a, memory_order_relaxed);
-  for (int iter = 0;; iter++) {
-    if (v & 1) {
-      if (iter < 10)
-        proc_yield(20);
-      else
-        internal_sched_yield();
-      v = atomic_load(a, memory_order_relaxed);
-      continue;
-    }
-    if (atomic_compare_exchange_weak(a, &v, v | 1, memory_order_acquire))
-      break;
-  }
-}
-
-void MBlock::Unlock() {
-  atomic_uintptr_t *a = reinterpret_cast<atomic_uintptr_t*>(this);
-  uptr v = atomic_load(a, memory_order_relaxed);
-  DCHECK(v & 1);
-  atomic_store(a, v & ~1, memory_order_relaxed);
-}
-
 struct MapUnmapCallback {
   void OnMap(uptr p, uptr size) const { }
   void OnUnmap(uptr p, uptr size) const {
@@ -95,8 +77,8 @@
   ThreadRegistryLock l(ctx->thread_registry);
   ScopedReport rep(ReportTypeSignalUnsafe);
   if (!IsFiredSuppression(ctx, rep, stack)) {
-    rep.AddStack(&stack);
-    OutputReport(ctx, rep, rep.GetReport()->stacks[0]);
+    rep.AddStack(&stack, true);
+    OutputReport(thr, rep);
   }
 }
 
@@ -106,43 +88,36 @@
   void *p = allocator()->Allocate(&thr->alloc_cache, sz, align);
   if (p == 0)
     return 0;
-  MBlock *b = new(allocator()->GetMetaData(p)) MBlock;
-  b->Init(sz, thr->tid, CurrentStackId(thr, pc));
-  if (ctx && ctx->initialized) {
-    if (thr->ignore_reads_and_writes == 0)
-      MemoryRangeImitateWrite(thr, pc, (uptr)p, sz);
-    else
-      MemoryResetRange(thr, pc, (uptr)p, sz);
-  }
-  DPrintf("#%d: alloc(%zu) = %p\n", thr->tid, sz, p);
+  if (ctx && ctx->initialized)
+    OnUserAlloc(thr, pc, (uptr)p, sz, true);
   SignalUnsafeCall(thr, pc);
   return p;
 }
 
 void user_free(ThreadState *thr, uptr pc, void *p) {
-  CHECK_NE(p, (void*)0);
-  DPrintf("#%d: free(%p)\n", thr->tid, p);
-  MBlock *b = (MBlock*)allocator()->GetMetaData(p);
-  if (b->ListHead()) {
-    MBlock::ScopedLock l(b);
-    for (SyncVar *s = b->ListHead(); s;) {
-      SyncVar *res = s;
-      s = s->next;
-      StatInc(thr, StatSyncDestroyed);
-      res->mtx.Lock();
-      res->mtx.Unlock();
-      DestroyAndFree(res);
-    }
-    b->ListReset();
-  }
-  if (ctx && ctx->initialized) {
-    if (thr->ignore_reads_and_writes == 0)
-      MemoryRangeFreed(thr, pc, (uptr)p, b->Size());
-  }
+  if (ctx && ctx->initialized)
+    OnUserFree(thr, pc, (uptr)p, true);
   allocator()->Deallocate(&thr->alloc_cache, p);
   SignalUnsafeCall(thr, pc);
 }
 
+void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
+  DPrintf("#%d: alloc(%zu) = %p\n", thr->tid, sz, p);
+  ctx->metamap.AllocBlock(thr, pc, p, sz);
+  if (write && thr->ignore_reads_and_writes == 0)
+    MemoryRangeImitateWrite(thr, pc, (uptr)p, sz);
+  else
+    MemoryResetRange(thr, pc, (uptr)p, sz);
+}
+
+void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) {
+  CHECK_NE(p, (void*)0);
+  uptr sz = ctx->metamap.FreeBlock(thr, pc, p);
+  DPrintf("#%d: free(%p, %zu)\n", thr->tid, p, sz);
+  if (write && thr->ignore_reads_and_writes == 0)
+    MemoryRangeFreed(thr, pc, (uptr)p, sz);
+}
+
 void *user_realloc(ThreadState *thr, uptr pc, void *p, uptr sz) {
   void *p2 = 0;
   // FIXME: Handle "shrinking" more efficiently,
@@ -152,9 +127,8 @@
     if (p2 == 0)
       return 0;
     if (p) {
-      MBlock *b = user_mblock(thr, p);
-      CHECK_NE(b, 0);
-      internal_memcpy(p2, p, min(b->Size(), sz));
+      uptr oldsz = user_alloc_usable_size(p);
+      internal_memcpy(p2, p, min(oldsz, sz));
     }
   }
   if (p)
@@ -162,20 +136,11 @@
   return p2;
 }
 
-uptr user_alloc_usable_size(ThreadState *thr, uptr pc, void *p) {
+uptr user_alloc_usable_size(const void *p) {
   if (p == 0)
     return 0;
-  MBlock *b = (MBlock*)allocator()->GetMetaData(p);
-  return b ? b->Size() : 0;
-}
-
-MBlock *user_mblock(ThreadState *thr, void *p) {
-  CHECK_NE(p, 0);
-  Allocator *a = allocator();
-  void *b = a->GetBlockBegin(p);
-  if (b == 0)
-    return 0;
-  return (MBlock*)a->GetMetaData(b);
+  MBlock *b = ctx->metamap.GetBlock((uptr)p);
+  return b ? b->siz : 0;
 }
 
 void invoke_malloc_hook(void *ptr, uptr size) {
@@ -183,6 +148,7 @@
   if (ctx == 0 || !ctx->initialized || thr->ignore_interceptors)
     return;
   __tsan_malloc_hook(ptr, size);
+  __sanitizer_malloc_hook(ptr, size);
 }
 
 void invoke_free_hook(void *ptr) {
@@ -190,6 +156,7 @@
   if (ctx == 0 || !ctx->initialized || thr->ignore_interceptors)
     return;
   __tsan_free_hook(ptr);
+  __sanitizer_free_hook(ptr);
 }
 
 void *internal_alloc(MBlockType typ, uptr sz) {
@@ -216,51 +183,63 @@
 using namespace __tsan;
 
 extern "C" {
+uptr __sanitizer_get_current_allocated_bytes() {
+  uptr stats[AllocatorStatCount];
+  allocator()->GetStats(stats);
+  return stats[AllocatorStatAllocated];
+}
 uptr __tsan_get_current_allocated_bytes() {
-  u64 stats[AllocatorStatCount];
-  allocator()->GetStats(stats);
-  u64 m = stats[AllocatorStatMalloced];
-  u64 f = stats[AllocatorStatFreed];
-  return m >= f ? m - f : 1;
+  return __sanitizer_get_current_allocated_bytes();
 }
 
+uptr __sanitizer_get_heap_size() {
+  uptr stats[AllocatorStatCount];
+  allocator()->GetStats(stats);
+  return stats[AllocatorStatMapped];
+}
 uptr __tsan_get_heap_size() {
-  u64 stats[AllocatorStatCount];
-  allocator()->GetStats(stats);
-  u64 m = stats[AllocatorStatMmapped];
-  u64 f = stats[AllocatorStatUnmapped];
-  return m >= f ? m - f : 1;
+  return __sanitizer_get_heap_size();
 }
 
+uptr __sanitizer_get_free_bytes() {
+  return 1;
+}
 uptr __tsan_get_free_bytes() {
-  return 1;
+  return __sanitizer_get_free_bytes();
 }
 
+uptr __sanitizer_get_unmapped_bytes() {
+  return 1;
+}
 uptr __tsan_get_unmapped_bytes() {
-  return 1;
+  return __sanitizer_get_unmapped_bytes();
 }
 
-uptr __tsan_get_estimated_allocated_size(uptr size) {
+uptr __sanitizer_get_estimated_allocated_size(uptr size) {
   return size;
 }
-
-bool __tsan_get_ownership(void *p) {
-  return allocator()->GetBlockBegin(p) != 0;
+uptr __tsan_get_estimated_allocated_size(uptr size) {
+  return __sanitizer_get_estimated_allocated_size(size);
 }
 
-uptr __tsan_get_allocated_size(void *p) {
-  if (p == 0)
-    return 0;
-  p = allocator()->GetBlockBegin(p);
-  if (p == 0)
-    return 0;
-  MBlock *b = (MBlock*)allocator()->GetMetaData(p);
-  return b->Size();
+int __sanitizer_get_ownership(const void *p) {
+  return allocator()->GetBlockBegin(p) != 0;
+}
+int __tsan_get_ownership(const void *p) {
+  return __sanitizer_get_ownership(p);
+}
+
+uptr __sanitizer_get_allocated_size(const void *p) {
+  return user_alloc_usable_size(p);
+}
+uptr __tsan_get_allocated_size(const void *p) {
+  return __sanitizer_get_allocated_size(p);
 }
 
 void __tsan_on_thread_idle() {
   ThreadState *thr = cur_thread();
   allocator()->SwallowCache(&thr->alloc_cache);
   internal_allocator()->SwallowCache(&thr->internal_alloc_cache);
+  ctx->metamap.OnThreadIdle(thr);
 }
 }  // extern "C"
diff --git a/lib/tsan/rtl/tsan_mman.h b/lib/tsan/rtl/tsan_mman.h
index 19d5554..4f87ad6 100644
--- a/lib/tsan/rtl/tsan_mman.h
+++ b/lib/tsan/rtl/tsan_mman.h
@@ -31,10 +31,7 @@
 void user_free(ThreadState *thr, uptr pc, void *p);
 void *user_realloc(ThreadState *thr, uptr pc, void *p, uptr sz);
 void *user_alloc_aligned(ThreadState *thr, uptr pc, uptr sz, uptr align);
-uptr user_alloc_usable_size(ThreadState *thr, uptr pc, void *p);
-// Given the pointer p into a valid allocated block,
-// returns the descriptor of the block.
-MBlock *user_mblock(ThreadState *thr, void *p);
+uptr user_alloc_usable_size(const void *p);
 
 // Invoking malloc/free hooks that may be installed by the user.
 void invoke_malloc_hook(void *ptr, uptr size);
@@ -62,7 +59,6 @@
   MBlockSuppression,
   MBlockExpectRace,
   MBlockSignal,
-  MBlockFD,
   MBlockJmpBuf,
 
   // This must be the last.
diff --git a/lib/tsan/rtl/tsan_mutex.cc b/lib/tsan/rtl/tsan_mutex.cc
index 2c16208..55d6e18 100644
--- a/lib/tsan/rtl/tsan_mutex.cc
+++ b/lib/tsan/rtl/tsan_mutex.cc
@@ -31,13 +31,13 @@
   /*0  MutexTypeInvalid*/     {},
   /*1  MutexTypeTrace*/       {MutexTypeLeaf},
   /*2  MutexTypeThreads*/     {MutexTypeReport},
-  /*3  MutexTypeReport*/      {MutexTypeSyncTab, MutexTypeSyncVar,
+  /*3  MutexTypeReport*/      {MutexTypeSyncVar,
                                MutexTypeMBlock, MutexTypeJavaMBlock},
   /*4  MutexTypeSyncVar*/     {MutexTypeDDetector},
-  /*5  MutexTypeSyncTab*/     {MutexTypeSyncVar},
+  /*5  MutexTypeSyncTab*/     {},  // unused
   /*6  MutexTypeSlab*/        {MutexTypeLeaf},
   /*7  MutexTypeAnnotations*/ {},
-  /*8  MutexTypeAtExit*/      {MutexTypeSyncTab},
+  /*8  MutexTypeAtExit*/      {MutexTypeSyncVar},
   /*9  MutexTypeMBlock*/      {MutexTypeSyncVar},
   /*10 MutexTypeJavaMBlock*/  {MutexTypeSyncVar},
   /*11 MutexTypeDDetector*/   {},
@@ -161,8 +161,20 @@
   CHECK(locked_[t]);
   locked_[t] = 0;
 }
+
+void InternalDeadlockDetector::CheckNoLocks() {
+  for (int i = 0; i != MutexTypeCount; i++) {
+    CHECK_EQ(locked_[i], 0);
+  }
+}
 #endif
 
+void CheckNoLocks(ThreadState *thr) {
+#if TSAN_DEBUG && !TSAN_GO
+  thr->internal_deadlock_detector.CheckNoLocks();
+#endif
+}
+
 const uptr kUnlocked = 0;
 const uptr kWriteLock = 1;
 const uptr kReadLock = 2;
@@ -222,7 +234,7 @@
       cmp = kUnlocked;
       if (atomic_compare_exchange_weak(&state_, &cmp, kWriteLock,
                                        memory_order_acquire)) {
-#if TSAN_COLLECT_STATS
+#if TSAN_COLLECT_STATS && !TSAN_GO
         StatInc(cur_thread(), stat_type_, backoff.Contention());
 #endif
         return;
@@ -250,7 +262,7 @@
   for (Backoff backoff; backoff.Do();) {
     prev = atomic_load(&state_, memory_order_acquire);
     if ((prev & kWriteLock) == 0) {
-#if TSAN_COLLECT_STATS
+#if TSAN_COLLECT_STATS && !TSAN_GO
       StatInc(cur_thread(), stat_type_, backoff.Contention());
 #endif
       return;
diff --git a/lib/tsan/rtl/tsan_mutex.h b/lib/tsan/rtl/tsan_mutex.h
index 12580fa..7bb1c48 100644
--- a/lib/tsan/rtl/tsan_mutex.h
+++ b/lib/tsan/rtl/tsan_mutex.h
@@ -71,6 +71,7 @@
   InternalDeadlockDetector();
   void Lock(MutexType t);
   void Unlock(MutexType t);
+  void CheckNoLocks();
  private:
   u64 seq_;
   u64 locked_[MutexTypeCount];
@@ -78,6 +79,10 @@
 
 void InitializeMutex();
 
+// Checks that the current thread does not hold any runtime locks
+// (e.g. when returning from an interceptor).
+void CheckNoLocks(ThreadState *thr);
+
 }  // namespace __tsan
 
 #endif  // TSAN_MUTEX_H
diff --git a/lib/tsan/rtl/tsan_platform.h b/lib/tsan/rtl/tsan_platform.h
index 7abe5f0..7d8d977 100644
--- a/lib/tsan/rtl/tsan_platform.h
+++ b/lib/tsan/rtl/tsan_platform.h
@@ -16,7 +16,9 @@
 C++ linux memory layout:
 0000 0000 0000 - 03c0 0000 0000: protected
 03c0 0000 0000 - 1000 0000 0000: shadow
-1000 0000 0000 - 6000 0000 0000: protected
+1000 0000 0000 - 3000 0000 0000: protected
+3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
+4000 0000 0000 - 6000 0000 0000: protected
 6000 0000 0000 - 6200 0000 0000: traces
 6200 0000 0000 - 7d00 0000 0000: -
 7d00 0000 0000 - 7e00 0000 0000: heap
@@ -27,7 +29,9 @@
 0400 0000 0000 - 1000 0000 0000: shadow
 1000 0000 0000 - 2900 0000 0000: protected
 2900 0000 0000 - 2c00 0000 0000: modules
-2c00 0000 0000 - 6000 0000 0000: -
+2c00 0000 0000 - 3000 0000 0000: -
+3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
+4000 0000 0000 - 6000 0000 0000: -
 6000 0000 0000 - 6200 0000 0000: traces
 6200 0000 0000 - 7d00 0000 0000: -
 7d00 0000 0000 - 7e00 0000 0000: heap
@@ -40,7 +44,9 @@
 00c0 0000 0000 - 00e0 0000 0000: heap
 00e0 0000 0000 - 1000 0000 0000: -
 1000 0000 0000 - 1380 0000 0000: shadow
-1460 0000 0000 - 6000 0000 0000: -
+1460 0000 0000 - 2000 0000 0000: -
+3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
+4000 0000 0000 - 6000 0000 0000: -
 6000 0000 0000 - 6200 0000 0000: traces
 6200 0000 0000 - 7fff ffff ffff: -
 
@@ -51,7 +57,8 @@
 00e0 0000 0000 - 0100 0000 0000: -
 0100 0000 0000 - 0560 0000 0000: shadow
 0560 0000 0000 - 0760 0000 0000: traces
-0760 0000 0000 - 07ff ffff ffff: -
+0760 0000 0000 - 07d0 0000 0000: metainfo (memory blocks and sync objects)
+07d0 0000 0000 - 07ff ffff ffff: -
 */
 
 #ifndef TSAN_PLATFORM_H
@@ -68,20 +75,28 @@
 static const uptr kLinuxAppMemEnd = 0x04dfffffffffULL;
 # if SANITIZER_WINDOWS
 static const uptr kLinuxShadowMsk = 0x010000000000ULL;
-# else
+static const uptr kMetaShadow     = 0x076000000000ULL;
+static const uptr kMetaSize       = 0x007000000000ULL;
+# else  // if SANITIZER_WINDOWS
 static const uptr kLinuxShadowMsk = 0x200000000000ULL;
-# endif
+static const uptr kMetaShadow     = 0x300000000000ULL;
+static const uptr kMetaSize       = 0x100000000000ULL;
+# endif  // if SANITIZER_WINDOWS
+#else  // defined(TSAN_GO)
+static const uptr kMetaShadow     = 0x300000000000ULL;
+static const uptr kMetaSize       = 0x100000000000ULL;
 // TSAN_COMPAT_SHADOW is intended for COMPAT virtual memory layout,
 // when memory addresses are of the 0x2axxxxxxxxxx form.
 // The option is enabled with 'setarch x86_64 -L'.
-#elif defined(TSAN_COMPAT_SHADOW) && TSAN_COMPAT_SHADOW
+# if defined(TSAN_COMPAT_SHADOW) && TSAN_COMPAT_SHADOW
 static const uptr kLinuxAppMemBeg = 0x290000000000ULL;
 static const uptr kLinuxAppMemEnd = 0x7fffffffffffULL;
 static const uptr kAppMemGapBeg   = 0x2c0000000000ULL;
 static const uptr kAppMemGapEnd   = 0x7d0000000000ULL;
-#else
+# else
 static const uptr kLinuxAppMemBeg = 0x7cf000000000ULL;
 static const uptr kLinuxAppMemEnd = 0x7fffffffffffULL;
+# endif
 #endif
 
 static const uptr kLinuxAppMemMsk = 0x7c0000000000ULL;
@@ -96,10 +111,16 @@
 // This has to be a macro to allow constant initialization of constants below.
 #ifndef TSAN_GO
 #define MemToShadow(addr) \
-    (((addr) & ~(kLinuxAppMemMsk | (kShadowCell - 1))) * kShadowCnt)
+    ((((uptr)addr) & ~(kLinuxAppMemMsk | (kShadowCell - 1))) * kShadowCnt)
+#define MemToMeta(addr) \
+    (u32*)(((((uptr)addr) & ~(kLinuxAppMemMsk | (kMetaShadowCell - 1))) \
+    / kMetaShadowCell * kMetaShadowSize) | kMetaShadow)
 #else
 #define MemToShadow(addr) \
-    ((((addr) & ~(kShadowCell - 1)) * kShadowCnt) | kLinuxShadowMsk)
+    (((((uptr)addr) & ~(kShadowCell - 1)) * kShadowCnt) | kLinuxShadowMsk)
+#define MemToMeta(addr) \
+    (u32*)(((((uptr)addr) & ~(kMetaShadowCell - 1)) \
+    / kMetaShadowCell * kMetaShadowSize) | kMetaShadow)
 #endif
 
 static const uptr kLinuxShadowBeg = MemToShadow(kLinuxAppMemBeg);
@@ -110,6 +131,8 @@
 #if defined(TSAN_COMPAT_SHADOW) && TSAN_COMPAT_SHADOW
   return (mem >= kLinuxAppMemBeg && mem < kAppMemGapBeg) ||
          (mem >= kAppMemGapEnd   && mem <= kLinuxAppMemEnd);
+#elif defined(TSAN_GO)
+  return mem <= kLinuxAppMemEnd;
 #else
   return mem >= kLinuxAppMemBeg && mem <= kLinuxAppMemEnd;
 #endif
@@ -140,7 +163,7 @@
 }
 
 void FlushShadowMemory();
-void WriteMemoryProfile(char *buf, uptr buf_size);
+void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive);
 uptr GetRSS();
 
 const char *InitializePlatform();
diff --git a/lib/tsan/rtl/tsan_platform_linux.cc b/lib/tsan/rtl/tsan_platform_linux.cc
index 3c3a58b..53ecfc6 100644
--- a/lib/tsan/rtl/tsan_platform_linux.cc
+++ b/lib/tsan/rtl/tsan_platform_linux.cc
@@ -14,7 +14,7 @@
 
 
 #include "sanitizer_common/sanitizer_platform.h"
-#if SANITIZER_LINUX
+#if SANITIZER_LINUX || SANITIZER_FREEBSD
 
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
@@ -32,7 +32,6 @@
 #include <string.h>
 #include <stdarg.h>
 #include <sys/mman.h>
-#include <sys/prctl.h>
 #include <sys/syscall.h>
 #include <sys/socket.h>
 #include <sys/time.h>
@@ -43,9 +42,10 @@
 #include <errno.h>
 #include <sched.h>
 #include <dlfcn.h>
+#if SANITIZER_LINUX
 #define __need_res_state
 #include <resolv.h>
-#include <malloc.h>
+#endif
 
 #ifdef sa_handler
 # undef sa_handler
@@ -55,44 +55,57 @@
 # undef sa_sigaction
 #endif
 
-extern "C" struct mallinfo __libc_mallinfo();
+#if SANITIZER_FREEBSD
+extern "C" void *__libc_stack_end;
+void *__libc_stack_end = 0;
+#endif
 
 namespace __tsan {
 
 const uptr kPageSize = 4096;
 
+enum {
+  MemTotal  = 0,
+  MemShadow = 1,
+  MemMeta   = 2,
+  MemFile   = 3,
+  MemMmap   = 4,
+  MemTrace  = 5,
+  MemHeap   = 6,
+  MemOther  = 7,
+  MemCount  = 8,
+};
+
 void FillProfileCallback(uptr start, uptr rss, bool file,
                          uptr *mem, uptr stats_size) {
-  CHECK_EQ(7, stats_size);
-  mem[6] += rss;  // total
+  mem[MemTotal] += rss;
   start >>= 40;
-  if (start < 0x10)  // shadow
-    mem[0] += rss;
-  else if (start >= 0x20 && start < 0x30)  // compat modules
-    mem[file ? 1 : 2] += rss;
-  else if (start >= 0x7e)  // modules
-    mem[file ? 1 : 2] += rss;
-  else if (start >= 0x60 && start < 0x62)  // traces
-    mem[3] += rss;
-  else if (start >= 0x7d && start < 0x7e)  // heap
-    mem[4] += rss;
-  else  // other
-    mem[5] += rss;
+  if (start < 0x10)
+    mem[MemShadow] += rss;
+  else if (start >= 0x20 && start < 0x30)
+    mem[file ? MemFile : MemMmap] += rss;
+  else if (start >= 0x30 && start < 0x40)
+    mem[MemMeta] += rss;
+  else if (start >= 0x7e)
+    mem[file ? MemFile : MemMmap] += rss;
+  else if (start >= 0x60 && start < 0x62)
+    mem[MemTrace] += rss;
+  else if (start >= 0x7d && start < 0x7e)
+    mem[MemHeap] += rss;
+  else
+    mem[MemOther] += rss;
 }
 
-void WriteMemoryProfile(char *buf, uptr buf_size) {
-  uptr mem[7] = {};
+void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive) {
+  uptr mem[MemCount] = {};
   __sanitizer::GetMemoryProfile(FillProfileCallback, mem, 7);
-  char *buf_pos = buf;
-  char *buf_end = buf + buf_size;
-  buf_pos += internal_snprintf(buf_pos, buf_end - buf_pos,
-      "RSS %zd MB: shadow:%zd file:%zd mmap:%zd trace:%zd heap:%zd other:%zd\n",
-      mem[6] >> 20, mem[0] >> 20, mem[1] >> 20, mem[2] >> 20,
-      mem[3] >> 20, mem[4] >> 20, mem[5] >> 20);
-  struct mallinfo mi = __libc_mallinfo();
-  buf_pos += internal_snprintf(buf_pos, buf_end - buf_pos,
-      "mallinfo: arena=%d mmap=%d fordblks=%d keepcost=%d\n",
-      mi.arena >> 20, mi.hblkhd >> 20, mi.fordblks >> 20, mi.keepcost >> 20);
+  internal_snprintf(buf, buf_size,
+      "RSS %zd MB: shadow:%zd meta:%zd file:%zd mmap:%zd"
+      " trace:%zd heap:%zd other:%zd nthr=%zd/%zd\n",
+      mem[MemTotal] >> 20, mem[MemShadow] >> 20, mem[MemMeta] >> 20,
+      mem[MemFile] >> 20, mem[MemMmap] >> 20, mem[MemTrace] >> 20,
+      mem[MemHeap] >> 20, mem[MemOther] >> 20,
+      nlive, nthread);
 }
 
 uptr GetRSS() {
@@ -101,15 +114,18 @@
   return mem[6];
 }
 
-
+#if SANITIZER_LINUX
 void FlushShadowMemoryCallback(
     const SuspendedThreadsList &suspended_threads_list,
     void *argument) {
   FlushUnneededShadowMemory(kLinuxShadowBeg, kLinuxShadowEnd - kLinuxShadowBeg);
 }
+#endif
 
 void FlushShadowMemory() {
+#if SANITIZER_LINUX
   StopTheWorld(FlushShadowMemoryCallback, 0);
+#endif
 }
 
 #ifndef TSAN_GO
@@ -123,9 +139,7 @@
     Die();
   }
 }
-#endif
 
-#ifndef TSAN_GO
 // Mark shadow for .rodata sections with the special kShadowRodata marker.
 // Accesses to .rodata can't race, so this saves time, memory and trace space.
 static void MapRodata() {
@@ -184,6 +198,7 @@
 }
 
 void InitializeShadowMemory() {
+  // Map memory shadow.
   uptr shadow = (uptr)MmapFixedNoReserve(kLinuxShadowBeg,
     kLinuxShadowEnd - kLinuxShadowBeg);
   if (shadow != kLinuxShadowBeg) {
@@ -192,23 +207,48 @@
                "to link with -pie (%p, %p).\n", shadow, kLinuxShadowBeg);
     Die();
   }
+  DPrintf("memory shadow: %zx-%zx (%zuGB)\n",
+      kLinuxShadowBeg, kLinuxShadowEnd,
+      (kLinuxShadowEnd - kLinuxShadowBeg) >> 30);
+
+  // Map meta shadow.
+  if (MemToMeta(kLinuxAppMemBeg) < (u32*)kMetaShadow) {
+    Printf("ThreadSanitizer: bad meta shadow (%p -> %p < %p)\n",
+        kLinuxAppMemBeg, MemToMeta(kLinuxAppMemBeg), kMetaShadow);
+    Die();
+  }
+  if (MemToMeta(kLinuxAppMemEnd) >= (u32*)(kMetaShadow + kMetaSize)) {
+    Printf("ThreadSanitizer: bad meta shadow (%p -> %p >= %p)\n",
+        kLinuxAppMemEnd, MemToMeta(kLinuxAppMemEnd), kMetaShadow + kMetaSize);
+    Die();
+  }
+  uptr meta = (uptr)MmapFixedNoReserve(kMetaShadow, kMetaSize);
+  if (meta != kMetaShadow) {
+    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
+    Printf("FATAL: Make sure to compile with -fPIE and "
+               "to link with -pie (%p, %p).\n", meta, kMetaShadow);
+    Die();
+  }
+  DPrintf("meta shadow: %zx-%zx (%zuGB)\n",
+      kMetaShadow, kMetaShadow + kMetaSize, kMetaSize >> 30);
+
+  // Protect gaps.
   const uptr kClosedLowBeg  = 0x200000;
   const uptr kClosedLowEnd  = kLinuxShadowBeg - 1;
   const uptr kClosedMidBeg = kLinuxShadowEnd + 1;
-  const uptr kClosedMidEnd = min(kLinuxAppMemBeg, kTraceMemBegin);
+  const uptr kClosedMidEnd = min(min(kLinuxAppMemBeg, kTraceMemBegin),
+      kMetaShadow);
+
   ProtectRange(kClosedLowBeg, kClosedLowEnd);
   ProtectRange(kClosedMidBeg, kClosedMidEnd);
-  DPrintf("kClosedLow   %zx-%zx (%zuGB)\n",
+  VPrintf(2, "kClosedLow   %zx-%zx (%zuGB)\n",
       kClosedLowBeg, kClosedLowEnd, (kClosedLowEnd - kClosedLowBeg) >> 30);
-  DPrintf("kLinuxShadow %zx-%zx (%zuGB)\n",
-      kLinuxShadowBeg, kLinuxShadowEnd,
-      (kLinuxShadowEnd - kLinuxShadowBeg) >> 30);
-  DPrintf("kClosedMid   %zx-%zx (%zuGB)\n",
+  VPrintf(2, "kClosedMid   %zx-%zx (%zuGB)\n",
       kClosedMidBeg, kClosedMidEnd, (kClosedMidEnd - kClosedMidBeg) >> 30);
-  DPrintf("kLinuxAppMem %zx-%zx (%zuGB)\n",
+  VPrintf(2, "app mem: %zx-%zx (%zuGB)\n",
       kLinuxAppMemBeg, kLinuxAppMemEnd,
       (kLinuxAppMemEnd - kLinuxAppMemBeg) >> 30);
-  DPrintf("stack        %zx\n", (uptr)&shadow);
+  VPrintf(2, "stack: %zx\n", (uptr)&shadow);
 
   MapRodata();
 }
@@ -330,6 +370,7 @@
 // This is required to properly "close" the fds, because we do not see internal
 // closes within glibc. The code is a pure hack.
 int ExtractResolvFDs(void *state, int *fds, int nfd) {
+#if SANITIZER_LINUX
   int cnt = 0;
   __res_state *statp = (__res_state*)state;
   for (int i = 0; i < MAXNS && cnt < nfd; i++) {
@@ -337,6 +378,9 @@
       fds[cnt++] = statp->_u._ext.nssocks[i];
   }
   return cnt;
+#else
+  return 0;
+#endif
 }
 
 // Extract file descriptors passed via UNIX domain sockets.
diff --git a/lib/tsan/rtl/tsan_platform_mac.cc b/lib/tsan/rtl/tsan_platform_mac.cc
index a545884..15d0688 100644
--- a/lib/tsan/rtl/tsan_platform_mac.cc
+++ b/lib/tsan/rtl/tsan_platform_mac.cc
@@ -47,7 +47,7 @@
 void FlushShadowMemory() {
 }
 
-void WriteMemoryProfile(char *buf, uptr buf_size) {
+void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive) {
 }
 
 uptr GetRSS() {
diff --git a/lib/tsan/rtl/tsan_platform_windows.cc b/lib/tsan/rtl/tsan_platform_windows.cc
index efc5d78..8b9d20e 100644
--- a/lib/tsan/rtl/tsan_platform_windows.cc
+++ b/lib/tsan/rtl/tsan_platform_windows.cc
@@ -28,7 +28,7 @@
 void FlushShadowMemory() {
 }
 
-void WriteMemoryProfile(char *buf, uptr buf_size) {
+void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive) {
 }
 
 uptr GetRSS() {
diff --git a/lib/tsan/rtl/tsan_report.cc b/lib/tsan/rtl/tsan_report.cc
index a835cee..e14d0b9 100644
--- a/lib/tsan/rtl/tsan_report.cc
+++ b/lib/tsan/rtl/tsan_report.cc
@@ -17,9 +17,9 @@
 
 namespace __tsan {
 
-class Decorator: private __sanitizer::AnsiColorDecorator {
+class Decorator: public __sanitizer::SanitizerCommonDecorator {
  public:
-  Decorator() : __sanitizer::AnsiColorDecorator(PrintsToTtyCached()) { }
+  Decorator() : SanitizerCommonDecorator() { }
   const char *Warning()    { return Red(); }
   const char *EndWarning() { return Default(); }
   const char *Access()     { return Blue(); }
diff --git a/lib/tsan/rtl/tsan_report.h b/lib/tsan/rtl/tsan_report.h
index 3817bcd..8ea9774 100644
--- a/lib/tsan/rtl/tsan_report.h
+++ b/lib/tsan/rtl/tsan_report.h
@@ -42,6 +42,7 @@
   char *file;
   int line;
   int col;
+  bool suppressable;
 };
 
 struct ReportMopMutex {
@@ -80,6 +81,7 @@
   char *name;
   char *file;
   int line;
+  bool suppressable;
   ReportStack *stack;
 };
 
diff --git a/lib/tsan/rtl/tsan_rtl.cc b/lib/tsan/rtl/tsan_rtl.cc
index 39e78c0..3e3e339 100644
--- a/lib/tsan/rtl/tsan_rtl.cc
+++ b/lib/tsan/rtl/tsan_rtl.cc
@@ -25,6 +25,16 @@
 #include "tsan_suppressions.h"
 #include "tsan_symbolize.h"
 
+#ifdef __SSE3__
+// <emmintrin.h> transitively includes <stdlib.h>,
+// and it's prohibited to include std headers into tsan runtime.
+// So we do this dirty trick.
+#define _MM_MALLOC_H_INCLUDED
+#define __MM_MALLOC_H
+#include <emmintrin.h>
+typedef __m128i m128;
+#endif
+
 volatile int __tsan_resumed = 0;
 
 extern "C" void __tsan_resume() {
@@ -112,10 +122,7 @@
   uptr n_running_threads;
   ctx->thread_registry->GetNumberOfThreads(&n_threads, &n_running_threads);
   InternalScopedBuffer<char> buf(4096);
-  internal_snprintf(buf.data(), buf.size(), "%d: nthr=%d nlive=%d\n",
-      i, n_threads, n_running_threads);
-  internal_write(fd, buf.data(), internal_strlen(buf.data()));
-  WriteMemoryProfile(buf.data(), buf.size());
+  WriteMemoryProfile(buf.data(), buf.size(), n_threads, n_running_threads);
   internal_write(fd, buf.data(), internal_strlen(buf.data()));
 }
 
@@ -131,19 +138,26 @@
 
   fd_t mprof_fd = kInvalidFd;
   if (flags()->profile_memory && flags()->profile_memory[0]) {
-    InternalScopedBuffer<char> filename(4096);
-    internal_snprintf(filename.data(), filename.size(), "%s.%d",
-        flags()->profile_memory, (int)internal_getpid());
-    uptr openrv = OpenFile(filename.data(), true);
-    if (internal_iserror(openrv)) {
-      Printf("ThreadSanitizer: failed to open memory profile file '%s'\n",
-          &filename[0]);
+    if (internal_strcmp(flags()->profile_memory, "stdout") == 0) {
+      mprof_fd = 1;
+    } else if (internal_strcmp(flags()->profile_memory, "stderr") == 0) {
+      mprof_fd = 2;
     } else {
-      mprof_fd = openrv;
+      InternalScopedBuffer<char> filename(4096);
+      internal_snprintf(filename.data(), filename.size(), "%s.%d",
+          flags()->profile_memory, (int)internal_getpid());
+      uptr openrv = OpenFile(filename.data(), true);
+      if (internal_iserror(openrv)) {
+        Printf("ThreadSanitizer: failed to open memory profile file '%s'\n",
+            &filename[0]);
+      } else {
+        mprof_fd = openrv;
+      }
     }
   }
 
   u64 last_flush = NanoTime();
+  u64 last_rss_check = NanoTime();
   uptr last_rss = 0;
   for (int i = 0;
       atomic_load(&ctx->stop_background_thread, memory_order_relaxed) == 0;
@@ -160,7 +174,9 @@
         last_flush = NanoTime();
       }
     }
-    if (flags()->memory_limit_mb > 0) {
+    // GetRSS can be expensive on huge programs, so don't do it every 100ms.
+    if (flags()->memory_limit_mb > 0 && last_rss_check + 1000 * kMs2Ns < now) {
+      last_rss_check = now;
       uptr rss = GetRSS();
       uptr limit = uptr(flags()->memory_limit_mb) << 20;
       if (flags()->verbosity > 0) {
@@ -222,6 +238,32 @@
   // so we can get away with unaligned mapping.
   // CHECK_EQ(addr, addr & ~((64 << 10) - 1));  // windows wants 64K alignment
   MmapFixedNoReserve(MemToShadow(addr), size * kShadowMultiplier);
+
+  // Meta shadow is 2:1, so tread carefully.
+  static bool data_mapped = false;
+  static uptr mapped_meta_end = 0;
+  uptr meta_begin = (uptr)MemToMeta(addr);
+  uptr meta_end = (uptr)MemToMeta(addr + size);
+  meta_begin = RoundDownTo(meta_begin, 64 << 10);
+  meta_end = RoundUpTo(meta_end, 64 << 10);
+  if (!data_mapped) {
+    // First call maps data+bss.
+    data_mapped = true;
+    MmapFixedNoReserve(meta_begin, meta_end - meta_begin);
+  } else {
+    // Mapping continous heap.
+    // Windows wants 64K alignment.
+    meta_begin = RoundDownTo(meta_begin, 64 << 10);
+    meta_end = RoundUpTo(meta_end, 64 << 10);
+    if (meta_end <= mapped_meta_end)
+      return;
+    if (meta_begin < mapped_meta_end)
+      meta_begin = mapped_meta_end;
+    MmapFixedNoReserve(meta_begin, meta_end - meta_begin);
+    mapped_meta_end = meta_end;
+  }
+  VPrintf(2, "mapped meta shadow for (%p-%p) at (%p-%p)\n",
+      addr, addr+size, meta_begin, meta_end);
 }
 
 void MapThreadTrace(uptr addr, uptr size) {
@@ -378,16 +420,37 @@
 }
 #endif
 
+#ifdef TSAN_GO
+NOINLINE
+void GrowShadowStack(ThreadState *thr) {
+  const int sz = thr->shadow_stack_end - thr->shadow_stack;
+  const int newsz = 2 * sz;
+  uptr *newstack = (uptr*)internal_alloc(MBlockShadowStack,
+      newsz * sizeof(uptr));
+  internal_memcpy(newstack, thr->shadow_stack, sz * sizeof(uptr));
+  internal_free(thr->shadow_stack);
+  thr->shadow_stack = newstack;
+  thr->shadow_stack_pos = newstack + sz;
+  thr->shadow_stack_end = newstack + newsz;
+}
+#endif
+
 u32 CurrentStackId(ThreadState *thr, uptr pc) {
   if (thr->shadow_stack_pos == 0)  // May happen during bootstrap.
     return 0;
-  if (pc) {
+  if (pc != 0) {
+#ifndef TSAN_GO
+    DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
+#else
+    if (thr->shadow_stack_pos == thr->shadow_stack_end)
+      GrowShadowStack(thr);
+#endif
     thr->shadow_stack_pos[0] = pc;
     thr->shadow_stack_pos++;
   }
   u32 id = StackDepotPut(thr->shadow_stack,
                          thr->shadow_stack_pos - thr->shadow_stack);
-  if (pc)
+  if (pc != 0)
     thr->shadow_stack_pos--;
   return id;
 }
@@ -449,7 +512,8 @@
   *s = 0;
 }
 
-static inline void HandleRace(ThreadState *thr, u64 *shadow_mem,
+ALWAYS_INLINE
+void HandleRace(ThreadState *thr, u64 *shadow_mem,
                               Shadow cur, Shadow old) {
   thr->racy_state[0] = cur.raw();
   thr->racy_state[1] = old.raw();
@@ -461,16 +525,12 @@
 #endif
 }
 
-static inline bool OldIsInSameSynchEpoch(Shadow old, ThreadState *thr) {
-  return old.epoch() >= thr->fast_synch_epoch;
-}
-
 static inline bool HappensBefore(Shadow old, ThreadState *thr) {
   return thr->clock.get(old.TidWithIgnore()) >= old.epoch();
 }
 
-ALWAYS_INLINE USED
-void MemoryAccessImpl(ThreadState *thr, uptr addr,
+ALWAYS_INLINE
+void MemoryAccessImpl1(ThreadState *thr, uptr addr,
     int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
     u64 *shadow_mem, Shadow cur) {
   StatInc(thr, StatMop);
@@ -564,6 +624,90 @@
   }
 }
 
+ALWAYS_INLINE
+bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+  Shadow cur(a);
+  for (uptr i = 0; i < kShadowCnt; i++) {
+    Shadow old(LoadShadow(&s[i]));
+    if (Shadow::Addr0AndSizeAreEqual(cur, old) &&
+        old.TidWithIgnore() == cur.TidWithIgnore() &&
+        old.epoch() > sync_epoch &&
+        old.IsAtomic() == cur.IsAtomic() &&
+        old.IsRead() <= cur.IsRead())
+      return true;
+  }
+  return false;
+}
+
+#if defined(__SSE3__) && TSAN_SHADOW_COUNT == 4
+#define SHUF(v0, v1, i0, i1, i2, i3) _mm_castps_si128(_mm_shuffle_ps( \
+    _mm_castsi128_ps(v0), _mm_castsi128_ps(v1), \
+    (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
+ALWAYS_INLINE
+bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+  // This is an optimized version of ContainsSameAccessSlow.
+  // load current access into access[0:63]
+  const m128 access     = _mm_cvtsi64_si128(a);
+  // duplicate high part of access in addr0:
+  // addr0[0:31]        = access[32:63]
+  // addr0[32:63]       = access[32:63]
+  // addr0[64:95]       = access[32:63]
+  // addr0[96:127]      = access[32:63]
+  const m128 addr0      = SHUF(access, access, 1, 1, 1, 1);
+  // load 4 shadow slots
+  const m128 shadow0    = _mm_load_si128((__m128i*)s);
+  const m128 shadow1    = _mm_load_si128((__m128i*)s + 1);
+  // load high parts of 4 shadow slots into addr_vect:
+  // addr_vect[0:31]    = shadow0[32:63]
+  // addr_vect[32:63]   = shadow0[96:127]
+  // addr_vect[64:95]   = shadow1[32:63]
+  // addr_vect[96:127]  = shadow1[96:127]
+  m128 addr_vect        = SHUF(shadow0, shadow1, 1, 3, 1, 3);
+  if (!is_write) {
+    // set IsRead bit in addr_vect
+    const m128 rw_mask1 = _mm_cvtsi64_si128(1<<15);
+    const m128 rw_mask  = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0);
+    addr_vect           = _mm_or_si128(addr_vect, rw_mask);
+  }
+  // addr0 == addr_vect?
+  const m128 addr_res   = _mm_cmpeq_epi32(addr0, addr_vect);
+  // epoch1[0:63]       = sync_epoch
+  const m128 epoch1     = _mm_cvtsi64_si128(sync_epoch);
+  // epoch[0:31]        = sync_epoch[0:31]
+  // epoch[32:63]       = sync_epoch[0:31]
+  // epoch[64:95]       = sync_epoch[0:31]
+  // epoch[96:127]      = sync_epoch[0:31]
+  const m128 epoch      = SHUF(epoch1, epoch1, 0, 0, 0, 0);
+  // load low parts of shadow cell epochs into epoch_vect:
+  // epoch_vect[0:31]   = shadow0[0:31]
+  // epoch_vect[32:63]  = shadow0[64:95]
+  // epoch_vect[64:95]  = shadow1[0:31]
+  // epoch_vect[96:127] = shadow1[64:95]
+  const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2);
+  // epoch_vect >= sync_epoch?
+  const m128 epoch_res  = _mm_cmpgt_epi32(epoch_vect, epoch);
+  // addr_res & epoch_res
+  const m128 res        = _mm_and_si128(addr_res, epoch_res);
+  // mask[0] = res[7]
+  // mask[1] = res[15]
+  // ...
+  // mask[15] = res[127]
+  const int mask        = _mm_movemask_epi8(res);
+  return mask != 0;
+}
+#endif
+
+ALWAYS_INLINE
+bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+#if defined(__SSE3__) && TSAN_SHADOW_COUNT == 4
+  bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write);
+  DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write));
+  return res;
+#else
+  return ContainsSameAccessSlow(s, a, sync_epoch, is_write);
+#endif
+}
+
 ALWAYS_INLINE USED
 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
     int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic) {
@@ -585,7 +729,7 @@
   }
 #endif
 
-  if (*shadow_mem == kShadowRodata) {
+  if (kCppMode && *shadow_mem == kShadowRodata) {
     // Access to .rodata section, no races here.
     // Measurements show that it can be 10-20% of all memory accesses.
     StatInc(thr, StatMop);
@@ -596,14 +740,12 @@
   }
 
   FastState fast_state = thr->fast_state;
-  if (fast_state.GetIgnoreBit())
+  if (fast_state.GetIgnoreBit()) {
+    StatInc(thr, StatMop);
+    StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
+    StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog));
+    StatInc(thr, StatMopIgnored);
     return;
-  if (kCollectHistory) {
-    fast_state.IncrementEpoch();
-    thr->fast_state = fast_state;
-    // We must not store to the trace if we do not store to the shadow.
-    // That is, this call must be moved somewhere below.
-    TraceAddEvent(thr, fast_state, EventTypeMop, pc);
   }
 
   Shadow cur(fast_state);
@@ -611,7 +753,41 @@
   cur.SetWrite(kAccessIsWrite);
   cur.SetAtomic(kIsAtomic);
 
-  MemoryAccessImpl(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+  if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(),
+      thr->fast_synch_epoch, kAccessIsWrite))) {
+    StatInc(thr, StatMop);
+    StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
+    StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog));
+    StatInc(thr, StatMopSame);
+    return;
+  }
+
+  if (kCollectHistory) {
+    fast_state.IncrementEpoch();
+    thr->fast_state = fast_state;
+    TraceAddEvent(thr, fast_state, EventTypeMop, pc);
+    cur.IncrementEpoch();
+  }
+
+  MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+      shadow_mem, cur);
+}
+
+// Called by MemoryAccessRange in tsan_rtl_thread.cc
+ALWAYS_INLINE USED
+void MemoryAccessImpl(ThreadState *thr, uptr addr,
+    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
+    u64 *shadow_mem, Shadow cur) {
+  if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(),
+      thr->fast_synch_epoch, kAccessIsWrite))) {
+    StatInc(thr, StatMop);
+    StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
+    StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog));
+    StatInc(thr, StatMopSame);
+    return;
+  }
+
+  MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
       shadow_mem, cur);
 }
 
@@ -729,17 +905,8 @@
 #ifndef TSAN_GO
   DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
 #else
-  if (thr->shadow_stack_pos == thr->shadow_stack_end) {
-    const int sz = thr->shadow_stack_end - thr->shadow_stack;
-    const int newsz = 2 * sz;
-    uptr *newstack = (uptr*)internal_alloc(MBlockShadowStack,
-        newsz * sizeof(uptr));
-    internal_memcpy(newstack, thr->shadow_stack, sz * sizeof(uptr));
-    internal_free(thr->shadow_stack);
-    thr->shadow_stack = newstack;
-    thr->shadow_stack_pos = newstack + sz;
-    thr->shadow_stack_end = newstack + newsz;
-  }
+  if (thr->shadow_stack_pos == thr->shadow_stack_end)
+    GrowShadowStack(thr);
 #endif
   thr->shadow_stack_pos[0] = pc;
   thr->shadow_stack_pos++;
diff --git a/lib/tsan/rtl/tsan_rtl.h b/lib/tsan/rtl/tsan_rtl.h
index 4364ef8..1b590b8 100644
--- a/lib/tsan/rtl/tsan_rtl.h
+++ b/lib/tsan/rtl/tsan_rtl.h
@@ -44,6 +44,7 @@
 #include "tsan_platform.h"
 #include "tsan_mutexset.h"
 #include "tsan_ignoreset.h"
+#include "tsan_stack_trace.h"
 
 #if SANITIZER_WORDSIZE != 64
 # error "ThreadSanitizer is supported only on 64-bit platforms"
@@ -51,77 +52,6 @@
 
 namespace __tsan {
 
-// Descriptor of user's memory block.
-struct MBlock {
-  /*
-  u64 mtx : 1;  // must be first
-  u64 lst : 44;
-  u64 stk : 31;  // on word boundary
-  u64 tid : kTidBits;
-  u64 siz : 128 - 1 - 31 - 44 - kTidBits;  // 39
-  */
-  u64 raw[2];
-
-  void Init(uptr siz, u32 tid, u32 stk) {
-    raw[0] = raw[1] = 0;
-    raw[1] |= (u64)siz << ((1 + 44 + 31 + kTidBits) % 64);
-    raw[1] |= (u64)tid << ((1 + 44 + 31) % 64);
-    raw[0] |= (u64)stk << (1 + 44);
-    raw[1] |= (u64)stk >> (64 - 44 - 1);
-    DCHECK_EQ(Size(), siz);
-    DCHECK_EQ(Tid(), tid);
-    DCHECK_EQ(StackId(), stk);
-  }
-
-  u32 Tid() const {
-    return GetLsb(raw[1] >> ((1 + 44 + 31) % 64), kTidBits);
-  }
-
-  uptr Size() const {
-    return raw[1] >> ((1 + 31 + 44 + kTidBits) % 64);
-  }
-
-  u32 StackId() const {
-    return (raw[0] >> (1 + 44)) | GetLsb(raw[1] << (64 - 44 - 1), 31);
-  }
-
-  SyncVar *ListHead() const {
-    return (SyncVar*)(GetLsb(raw[0] >> 1, 44) << 3);
-  }
-
-  void ListPush(SyncVar *v) {
-    SyncVar *lst = ListHead();
-    v->next = lst;
-    u64 x = (u64)v ^ (u64)lst;
-    x = (x >> 3) << 1;
-    raw[0] ^= x;
-    DCHECK_EQ(ListHead(), v);
-  }
-
-  SyncVar *ListPop() {
-    SyncVar *lst = ListHead();
-    SyncVar *nxt = lst->next;
-    lst->next = 0;
-    u64 x = (u64)lst ^ (u64)nxt;
-    x = (x >> 3) << 1;
-    raw[0] ^= x;
-    DCHECK_EQ(ListHead(), nxt);
-    return lst;
-  }
-
-  void ListReset() {
-    SyncVar *lst = ListHead();
-    u64 x = (u64)lst;
-    x = (x >> 3) << 1;
-    raw[0] ^= x;
-    DCHECK_EQ(ListHead(), 0);
-  }
-
-  void Lock();
-  void Unlock();
-  typedef GenericScopedLock<MBlock> ScopedLock;
-};
-
 #ifndef TSAN_GO
 #if defined(TSAN_COMPAT_SHADOW) && TSAN_COMPAT_SHADOW
 const uptr kAllocatorSpace = 0x7d0000000000ULL;
@@ -131,7 +61,7 @@
 const uptr kAllocatorSize  =  0x10000000000ULL;  // 1T.
 
 struct MapUnmapCallback;
-typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, sizeof(MBlock),
+typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, 0,
     DefaultSizeClassMap, MapUnmapCallback> PrimaryAllocator;
 typedef SizeClassAllocatorLocalCache<PrimaryAllocator> AllocatorCache;
 typedef LargeMmapAllocator<MapUnmapCallback> SecondaryAllocator;
@@ -148,14 +78,14 @@
 // FastState (from most significant bit):
 //   ignore          : 1
 //   tid             : kTidBits
-//   epoch           : kClkBits
 //   unused          : -
 //   history_size    : 3
+//   epoch           : kClkBits
 class FastState {
  public:
   FastState(u64 tid, u64 epoch) {
     x_ = tid << kTidShift;
-    x_ |= epoch << kClkShift;
+    x_ |= epoch;
     DCHECK_EQ(tid, this->tid());
     DCHECK_EQ(epoch, this->epoch());
     DCHECK_EQ(GetIgnoreBit(), false);
@@ -180,13 +110,13 @@
   }
 
   u64 epoch() const {
-    u64 res = (x_ << (kTidBits + 1)) >> (64 - kClkBits);
+    u64 res = x_ & ((1ull << kClkBits) - 1);
     return res;
   }
 
   void IncrementEpoch() {
     u64 old_epoch = epoch();
-    x_ += 1 << kClkShift;
+    x_ += 1;
     DCHECK_EQ(old_epoch + 1, epoch());
     (void)old_epoch;
   }
@@ -198,17 +128,19 @@
   void SetHistorySize(int hs) {
     CHECK_GE(hs, 0);
     CHECK_LE(hs, 7);
-    x_ = (x_ & ~7) | hs;
+    x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift);
   }
 
+  ALWAYS_INLINE
   int GetHistorySize() const {
-    return (int)(x_ & 7);
+    return (int)((x_ >> kHistoryShift) & kHistoryMask);
   }
 
   void ClearHistorySize() {
-    x_ &= ~7;
+    SetHistorySize(0);
   }
 
+  ALWAYS_INLINE
   u64 GetTracePos() const {
     const int hs = GetHistorySize();
     // When hs == 0, the trace consists of 2 parts.
@@ -219,20 +151,21 @@
  private:
   friend class Shadow;
   static const int kTidShift = 64 - kTidBits - 1;
-  static const int kClkShift = kTidShift - kClkBits;
   static const u64 kIgnoreBit = 1ull << 63;
   static const u64 kFreedBit = 1ull << 63;
+  static const u64 kHistoryShift = kClkBits;
+  static const u64 kHistoryMask = 7;
   u64 x_;
 };
 
 // Shadow (from most significant bit):
 //   freed           : 1
 //   tid             : kTidBits
-//   epoch           : kClkBits
 //   is_atomic       : 1
 //   is_read         : 1
 //   size_log        : 2
 //   addr0           : 3
+//   epoch           : kClkBits
 class Shadow : public FastState {
  public:
   explicit Shadow(u64 x)
@@ -245,10 +178,10 @@
   }
 
   void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
-    DCHECK_EQ(x_ & 31, 0);
+    DCHECK_EQ((x_ >> kClkBits) & 31, 0);
     DCHECK_LE(addr0, 7);
     DCHECK_LE(kAccessSizeLog, 3);
-    x_ |= (kAccessSizeLog << 3) | addr0;
+    x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits;
     DCHECK_EQ(kAccessSizeLog, size_log());
     DCHECK_EQ(addr0, this->addr0());
   }
@@ -281,47 +214,34 @@
     return shifted_xor == 0;
   }
 
-  static inline bool Addr0AndSizeAreEqual(const Shadow s1, const Shadow s2) {
-    u64 masked_xor = (s1.x_ ^ s2.x_) & 31;
+  static ALWAYS_INLINE
+  bool Addr0AndSizeAreEqual(const Shadow s1, const Shadow s2) {
+    u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31;
     return masked_xor == 0;
   }
 
-  static inline bool TwoRangesIntersect(Shadow s1, Shadow s2,
+  static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2,
       unsigned kS2AccessSize) {
     bool res = false;
     u64 diff = s1.addr0() - s2.addr0();
     if ((s64)diff < 0) {  // s1.addr0 < s2.addr0  // NOLINT
       // if (s1.addr0() + size1) > s2.addr0()) return true;
-      if (s1.size() > -diff)  res = true;
+      if (s1.size() > -diff)
+        res = true;
     } else {
       // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true;
-      if (kS2AccessSize > diff) res = true;
+      if (kS2AccessSize > diff)
+        res = true;
     }
-    DCHECK_EQ(res, TwoRangesIntersectSLOW(s1, s2));
-    DCHECK_EQ(res, TwoRangesIntersectSLOW(s2, s1));
+    DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2));
+    DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1));
     return res;
   }
 
-  // The idea behind the offset is as follows.
-  // Consider that we have 8 bool's contained within a single 8-byte block
-  // (mapped to a single shadow "cell"). Now consider that we write to the bools
-  // from a single thread (which we consider the common case).
-  // W/o offsetting each access will have to scan 4 shadow values at average
-  // to find the corresponding shadow value for the bool.
-  // With offsetting we start scanning shadow with the offset so that
-  // each access hits necessary shadow straight off (at least in an expected
-  // optimistic case).
-  // This logic works seamlessly for any layout of user data. For example,
-  // if user data is {int, short, char, char}, then accesses to the int are
-  // offsetted to 0, short - 4, 1st char - 6, 2nd char - 7. Hopefully, accesses
-  // from a single thread won't need to scan all 8 shadow values.
-  unsigned ComputeSearchOffset() {
-    return x_ & 7;
-  }
-  u64 addr0() const { return x_ & 7; }
-  u64 size() const { return 1ull << size_log(); }
-  bool IsWrite() const { return !IsRead(); }
-  bool IsRead() const { return x_ & kReadBit; }
+  u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; }
+  u64 ALWAYS_INLINE size() const { return 1ull << size_log(); }
+  bool ALWAYS_INLINE IsWrite() const { return !IsRead(); }
+  bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; }
 
   // The idea behind the freed bit is as follows.
   // When the memory is freed (or otherwise unaccessible) we write to the shadow
@@ -346,15 +266,14 @@
     return res;
   }
 
-  bool IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
-    // analyzes 5-th bit (is_read) and 6-th bit (is_atomic)
-    bool v = x_ & u64(((kIsWrite ^ 1) << kReadShift)
-        | (kIsAtomic << kAtomicShift));
+  bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
+    bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift)
+        | (u64(kIsAtomic) << kAtomicShift));
     DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic));
     return v;
   }
 
-  bool IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
+  bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
     bool v = ((x_ >> kReadShift) & 3)
         <= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
     DCHECK_EQ(v, (IsAtomic() < kIsAtomic) ||
@@ -362,7 +281,7 @@
     return v;
   }
 
-  bool IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
+  bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
     bool v = ((x_ >> kReadShift) & 3)
         >= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
     DCHECK_EQ(v, (IsAtomic() > kIsAtomic) ||
@@ -371,14 +290,14 @@
   }
 
  private:
-  static const u64 kReadShift   = 5;
+  static const u64 kReadShift   = 5 + kClkBits;
   static const u64 kReadBit     = 1ull << kReadShift;
-  static const u64 kAtomicShift = 6;
+  static const u64 kAtomicShift = 6 + kClkBits;
   static const u64 kAtomicBit   = 1ull << kAtomicShift;
 
-  u64 size_log() const { return (x_ >> 3) & 3; }
+  u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; }
 
-  static bool TwoRangesIntersectSLOW(const Shadow s1, const Shadow s2) {
+  static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) {
     if (s1.addr0() == s2.addr0()) return true;
     if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0())
       return true;
@@ -457,6 +376,9 @@
   bool in_signal_handler;
   SignalContext *signal_ctx;
 
+  DenseSlabAllocCache block_cache;
+  DenseSlabAllocCache sync_cache;
+
 #ifndef TSAN_GO
   u32 last_sleep_stack_id;
   ThreadClock last_sleep_clock;
@@ -530,7 +452,7 @@
   bool initialized;
   bool after_multithreaded_fork;
 
-  SyncTab synctab;
+  MetaMap metamap;
 
   Mutex report_mtx;
   int nreported;
@@ -576,11 +498,11 @@
   explicit ScopedReport(ReportType typ);
   ~ScopedReport();
 
-  void AddStack(const StackTrace *stack);
   void AddMemoryAccess(uptr addr, Shadow s, const StackTrace *stack,
                        const MutexSet *mset);
-  void AddThread(const ThreadContext *tctx);
-  void AddThread(int unique_tid);
+  void AddStack(const StackTrace *stack, bool suppressable = false);
+  void AddThread(const ThreadContext *tctx, bool suppressable = false);
+  void AddThread(int unique_tid, bool suppressable = false);
   void AddUniqueTid(int unique_tid);
   void AddMutex(const SyncVar *s);
   u64 AddMutex(u64 id);
@@ -628,11 +550,7 @@
 void ForkChildAfter(ThreadState *thr, uptr pc);
 
 void ReportRace(ThreadState *thr);
-bool OutputReport(Context *ctx,
-                  const ScopedReport &srep,
-                  const ReportStack *suppress_stack1,
-                  const ReportStack *suppress_stack2 = 0,
-                  const ReportLocation *suppress_loc = 0);
+bool OutputReport(ThreadState *thr, const ScopedReport &srep);
 bool IsFiredSuppression(Context *ctx,
                         const ScopedReport &srep,
                         const StackTrace &trace);
@@ -661,9 +579,8 @@
 void Initialize(ThreadState *thr);
 int Finalize(ThreadState *thr);
 
-SyncVar* GetJavaSync(ThreadState *thr, uptr pc, uptr addr,
-                     bool write_lock, bool create);
-SyncVar* GetAndRemoveJavaSync(ThreadState *thr, uptr pc, uptr addr);
+void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write);
+void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write);
 
 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
     int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic);
diff --git a/lib/tsan/rtl/tsan_rtl_mutex.cc b/lib/tsan/rtl/tsan_rtl_mutex.cc
index 650cd62..4cf47ec 100644
--- a/lib/tsan/rtl/tsan_rtl_mutex.cc
+++ b/lib/tsan/rtl/tsan_rtl_mutex.cc
@@ -52,14 +52,18 @@
 
 static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
     uptr addr, u64 mid) {
+  // In Go, these misuses are either impossible, or detected by std lib,
+  // or false positives (e.g. unlock in a different thread).
+  if (kGoMode)
+    return;
   ThreadRegistryLock l(ctx->thread_registry);
   ScopedReport rep(typ);
   rep.AddMutex(mid);
   StackTrace trace;
   trace.ObtainCurrent(thr, pc);
-  rep.AddStack(&trace);
+  rep.AddStack(&trace, true);
   rep.AddLocation(addr, 1);
-  OutputReport(ctx, rep, rep.GetReport()->stacks[0]);
+  OutputReport(thr, rep);
 }
 
 void MutexCreate(ThreadState *thr, uptr pc, uptr addr,
@@ -72,10 +76,12 @@
     MemoryWrite(thr, pc, addr, kSizeLog1);
     thr->is_freeing = false;
   }
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, true);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
   s->is_rw = rw;
   s->is_recursive = recursive;
   s->is_linker_init = linker_init;
+  if (kCppMode && s->creation_stack_id == 0)
+    s->creation_stack_id = CurrentStackId(thr, pc);
   s->mtx.Unlock();
 }
 
@@ -88,37 +94,54 @@
   if (IsGlobalVar(addr))
     return;
 #endif
-  SyncVar *s = ctx->synctab.GetAndRemove(thr, pc, addr);
-  if (s == 0)
-    return;
-  if (flags()->detect_deadlocks) {
-    Callback cb(thr, pc);
-    ctx->dd->MutexDestroy(&cb, &s->dd);
-  }
   if (IsAppMem(addr)) {
     CHECK(!thr->is_freeing);
     thr->is_freeing = true;
     MemoryWrite(thr, pc, addr, kSizeLog1);
     thr->is_freeing = false;
   }
+  SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr);
+  if (s == 0)
+    return;
+  if (flags()->detect_deadlocks) {
+    Callback cb(thr, pc);
+    ctx->dd->MutexDestroy(&cb, &s->dd);
+    ctx->dd->MutexInit(&cb, &s->dd);
+  }
+  bool unlock_locked = false;
   if (flags()->report_destroy_locked
       && s->owner_tid != SyncVar::kInvalidTid
       && !s->is_broken) {
     s->is_broken = true;
+    unlock_locked = true;
+  }
+  u64 mid = s->GetId();
+  u32 last_lock = s->last_lock;
+  if (!unlock_locked)
+    s->Reset();  // must not reset it before the report is printed
+  s->mtx.Unlock();
+  if (unlock_locked) {
     ThreadRegistryLock l(ctx->thread_registry);
     ScopedReport rep(ReportTypeMutexDestroyLocked);
-    rep.AddMutex(s);
+    rep.AddMutex(mid);
     StackTrace trace;
     trace.ObtainCurrent(thr, pc);
     rep.AddStack(&trace);
-    FastState last(s->last_lock);
+    FastState last(last_lock);
     RestoreStack(last.tid(), last.epoch(), &trace, 0);
-    rep.AddStack(&trace);
-    rep.AddLocation(s->addr, 1);
-    OutputReport(ctx, rep, rep.GetReport()->stacks[0]);
+    rep.AddStack(&trace, true);
+    rep.AddLocation(addr, 1);
+    OutputReport(thr, rep);
   }
-  thr->mset.Remove(s->GetId());
-  DestroyAndFree(s);
+  if (unlock_locked) {
+    SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr);
+    if (s != 0) {
+      s->Reset();
+      s->mtx.Unlock();
+    }
+  }
+  thr->mset.Remove(mid);
+  // s will be destroyed and freed in MetaMap::FreeBlock.
 }
 
 void MutexLock(ThreadState *thr, uptr pc, uptr addr, int rec, bool try_lock) {
@@ -126,7 +149,7 @@
   CHECK_GT(rec, 0);
   if (IsAppMem(addr))
     MemoryReadAtomic(thr, pc, addr, kSizeLog1);
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, true);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
   TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId());
   bool report_double_lock = false;
@@ -149,7 +172,7 @@
   }
   s->recursion += rec;
   thr->mset.Add(s->GetId(), true, thr->fast_state.epoch());
-  if (flags()->detect_deadlocks && s->recursion == 1) {
+  if (flags()->detect_deadlocks && (s->recursion - rec) == 0) {
     Callback cb(thr, pc);
     if (!try_lock)
       ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
@@ -170,12 +193,12 @@
   DPrintf("#%d: MutexUnlock %zx all=%d\n", thr->tid, addr, all);
   if (IsAppMem(addr))
     MemoryReadAtomic(thr, pc, addr, kSizeLog1);
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, true);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
   TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
   int rec = 0;
   bool report_bad_unlock = false;
-  if (s->recursion == 0 || s->owner_tid != thr->tid) {
+  if (kCppMode && (s->recursion == 0 || s->owner_tid != thr->tid)) {
     if (flags()->report_mutex_bugs && !s->is_broken) {
       s->is_broken = true;
       report_bad_unlock = true;
@@ -192,7 +215,7 @@
     }
   }
   thr->mset.Del(s->GetId(), true);
-  if (flags()->detect_deadlocks && s->recursion == 0) {
+  if (flags()->detect_deadlocks && s->recursion == 0 && !report_bad_unlock) {
     Callback cb(thr, pc);
     ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true);
   }
@@ -201,7 +224,7 @@
   // Can't touch s after this point.
   if (report_bad_unlock)
     ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid);
-  if (flags()->detect_deadlocks) {
+  if (flags()->detect_deadlocks && !report_bad_unlock) {
     Callback cb(thr, pc);
     ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
   }
@@ -213,7 +236,7 @@
   StatInc(thr, StatMutexReadLock);
   if (IsAppMem(addr))
     MemoryReadAtomic(thr, pc, addr, kSizeLog1);
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, false);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, false);
   thr->fast_state.IncrementEpoch();
   TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId());
   bool report_bad_lock = false;
@@ -248,7 +271,7 @@
   StatInc(thr, StatMutexReadUnlock);
   if (IsAppMem(addr))
     MemoryReadAtomic(thr, pc, addr, kSizeLog1);
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, true);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
   TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
   bool report_bad_unlock = false;
@@ -279,7 +302,7 @@
   DPrintf("#%d: MutexReadOrWriteUnlock %zx\n", thr->tid, addr);
   if (IsAppMem(addr))
     MemoryReadAtomic(thr, pc, addr, kSizeLog1);
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, true);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
   bool write = true;
   bool report_bad_unlock = false;
   if (s->owner_tid == SyncVar::kInvalidTid) {
@@ -324,7 +347,7 @@
 
 void MutexRepair(ThreadState *thr, uptr pc, uptr addr) {
   DPrintf("#%d: MutexRepair %zx\n", thr->tid, addr);
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, true);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
   s->owner_tid = SyncVar::kInvalidTid;
   s->recursion = 0;
   s->mtx.Unlock();
@@ -334,7 +357,7 @@
   DPrintf("#%d: Acquire %zx\n", thr->tid, addr);
   if (thr->ignore_sync)
     return;
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, false);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, false);
   AcquireImpl(thr, pc, &s->clock);
   s->mtx.ReadUnlock();
 }
@@ -361,7 +384,7 @@
   DPrintf("#%d: Release %zx\n", thr->tid, addr);
   if (thr->ignore_sync)
     return;
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, true);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
   // Can't increment epoch w/o writing to the trace as well.
   TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
@@ -373,7 +396,7 @@
   DPrintf("#%d: ReleaseStore %zx\n", thr->tid, addr);
   if (thr->ignore_sync)
     return;
-  SyncVar *s = ctx->synctab.GetOrCreateAndLock(thr, pc, addr, true);
+  SyncVar *s = ctx->metamap.GetOrCreateAndLock(thr, pc, addr, true);
   thr->fast_state.IncrementEpoch();
   // Can't increment epoch w/o writing to the trace as well.
   TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
@@ -448,7 +471,7 @@
     rep.AddUniqueTid((int)r->loop[i].thr_ctx);
     rep.AddThread((int)r->loop[i].thr_ctx);
   }
-  StackTrace stacks[2 * DDReport::kMaxLoopSize];
+  InternalScopedBuffer<StackTrace> stacks(2 * DDReport::kMaxLoopSize);
   uptr dummy_pc = 0x42;
   for (int i = 0; i < r->n; i++) {
     uptr size;
@@ -462,12 +485,10 @@
         // but we should still produce some stack trace in the report.
         stacks[i].Init(&dummy_pc, 1);
       }
-      rep.AddStack(&stacks[i]);
+      rep.AddStack(&stacks[i], true);
     }
   }
-  // FIXME: use all stacks for suppressions, not just the second stack of the
-  // first edge.
-  OutputReport(ctx, rep, rep.GetReport()->stacks[0]);
+  OutputReport(thr, rep);
 }
 
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_rtl_report.cc b/lib/tsan/rtl/tsan_rtl_report.cc
index 52edb5a..b75c319 100644
--- a/lib/tsan/rtl/tsan_rtl_report.cc
+++ b/lib/tsan/rtl/tsan_rtl_report.cc
@@ -162,9 +162,10 @@
   DestroyAndFree(rep_);
 }
 
-void ScopedReport::AddStack(const StackTrace *stack) {
+void ScopedReport::AddStack(const StackTrace *stack, bool suppressable) {
   ReportStack **rs = rep_->stacks.PushBack();
   *rs = SymbolizeStack(*stack);
+  (*rs)->suppressable = suppressable;
 }
 
 void ScopedReport::AddMemoryAccess(uptr addr, Shadow s,
@@ -178,6 +179,8 @@
   mop->write = s.IsWrite();
   mop->atomic = s.IsAtomic();
   mop->stack = SymbolizeStack(*stack);
+  if (mop->stack)
+    mop->stack->suppressable = true;
   for (uptr i = 0; i < mset->Size(); i++) {
     MutexSet::Desc d = mset->Get(i);
     u64 mid = this->AddMutex(d.id);
@@ -190,7 +193,7 @@
   rep_->unique_tids.PushBack(unique_tid);
 }
 
-void ScopedReport::AddThread(const ThreadContext *tctx) {
+void ScopedReport::AddThread(const ThreadContext *tctx, bool suppressable) {
   for (uptr i = 0; i < rep_->threads.Size(); i++) {
     if ((u32)rep_->threads[i]->id == tctx->tid)
       return;
@@ -205,6 +208,8 @@
   rt->parent_tid = tctx->parent_tid;
   rt->stack = 0;
   rt->stack = SymbolizeStackId(tctx->creation_stack_id);
+  if (rt->stack)
+    rt->stack->suppressable = suppressable;
 }
 
 #ifndef TSAN_GO
@@ -251,9 +256,9 @@
 }
 #endif
 
-void ScopedReport::AddThread(int unique_tid) {
+void ScopedReport::AddThread(int unique_tid, bool suppressable) {
 #ifndef TSAN_GO
-  AddThread(FindThreadByUidLocked(unique_tid));
+  AddThread(FindThreadByUidLocked(unique_tid), suppressable);
 #endif
 }
 
@@ -275,7 +280,7 @@
   u64 uid = 0;
   u64 mid = id;
   uptr addr = SyncVar::SplitId(id, &uid);
-  SyncVar *s = ctx->synctab.GetIfExistsAndLock(addr, false);
+  SyncVar *s = ctx->metamap.GetIfExistsAndLock(addr);
   // Check that the mutex is still alive.
   // Another mutex can be created at the same address,
   // so check uid as well.
@@ -286,7 +291,7 @@
     AddDeadMutex(id);
   }
   if (s)
-    s->mtx.ReadUnlock();
+    s->mtx.Unlock();
   return mid;
 }
 
@@ -326,21 +331,26 @@
     return;
   }
   MBlock *b = 0;
-  if (allocator()->PointerIsMine((void*)addr)
-      && (b = user_mblock(0, (void*)addr))) {
-    ThreadContext *tctx = FindThreadByTidLocked(b->Tid());
+  Allocator *a = allocator();
+  if (a->PointerIsMine((void*)addr)) {
+    void *block_begin = a->GetBlockBegin((void*)addr);
+    if (block_begin)
+      b = ctx->metamap.GetBlock((uptr)block_begin);
+  }
+  if (b != 0) {
+    ThreadContext *tctx = FindThreadByTidLocked(b->tid);
     void *mem = internal_alloc(MBlockReportLoc, sizeof(ReportLocation));
     ReportLocation *loc = new(mem) ReportLocation();
     rep_->locs.PushBack(loc);
     loc->type = ReportLocationHeap;
     loc->addr = (uptr)allocator()->GetBlockBegin((void*)addr);
-    loc->size = b->Size();
-    loc->tid = tctx ? tctx->tid : b->Tid();
+    loc->size = b->siz;
+    loc->tid = tctx ? tctx->tid : b->tid;
     loc->name = 0;
     loc->file = 0;
     loc->line = 0;
     loc->stack = 0;
-    loc->stack = SymbolizeStackId(b->StackId());
+    loc->stack = SymbolizeStackId(b->stk);
     if (tctx)
       AddThread(tctx);
     return;
@@ -356,6 +366,7 @@
   }
   ReportLocation *loc = SymbolizeData(addr);
   if (loc) {
+    loc->suppressable = true;
     rep_->locs.PushBack(loc);
     return;
   }
@@ -495,25 +506,31 @@
   }
 }
 
-bool OutputReport(Context *ctx,
-                  const ScopedReport &srep,
-                  const ReportStack *suppress_stack1,
-                  const ReportStack *suppress_stack2,
-                  const ReportLocation *suppress_loc) {
+bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
   atomic_store(&ctx->last_symbolize_time_ns, NanoTime(), memory_order_relaxed);
   const ReportDesc *rep = srep.GetReport();
   Suppression *supp = 0;
-  uptr suppress_pc = IsSuppressed(rep->typ, suppress_stack1, &supp);
-  if (suppress_pc == 0)
-    suppress_pc = IsSuppressed(rep->typ, suppress_stack2, &supp);
-  if (suppress_pc == 0)
-    suppress_pc = IsSuppressed(rep->typ, suppress_loc, &supp);
+  uptr suppress_pc = 0;
+  for (uptr i = 0; suppress_pc == 0 && i < rep->mops.Size(); i++)
+    suppress_pc = IsSuppressed(rep->typ, rep->mops[i]->stack, &supp);
+  for (uptr i = 0; suppress_pc == 0 && i < rep->stacks.Size(); i++)
+    suppress_pc = IsSuppressed(rep->typ, rep->stacks[i], &supp);
+  for (uptr i = 0; suppress_pc == 0 && i < rep->threads.Size(); i++)
+    suppress_pc = IsSuppressed(rep->typ, rep->threads[i]->stack, &supp);
+  for (uptr i = 0; suppress_pc == 0 && i < rep->locs.Size(); i++)
+    suppress_pc = IsSuppressed(rep->typ, rep->locs[i], &supp);
   if (suppress_pc != 0) {
     FiredSuppression s = {srep.GetReport()->typ, suppress_pc, supp};
     ctx->fired_suppressions.push_back(s);
   }
-  if (OnReport(rep, suppress_pc != 0))
-    return false;
+  {
+    bool old_is_freeing = thr->is_freeing;
+    thr->is_freeing = false;
+    bool suppressed = OnReport(rep, suppress_pc != 0);
+    thr->is_freeing = old_is_freeing;
+    if (suppressed)
+      return false;
+  }
   PrintReport(rep);
   ctx->nreported++;
   if (flags()->halt_on_error)
@@ -611,6 +628,8 @@
 }
 
 void ReportRace(ThreadState *thr) {
+  CheckNoLocks(thr);
+
   // Symbolizer makes lots of intercepted calls. If we try to process them,
   // at best it will cause deadlocks on internal mutexes.
   ScopedIgnoreInterceptors ignore;
@@ -695,11 +714,7 @@
   }
 #endif
 
-  ReportLocation *suppress_loc = rep.GetReport()->locs.Size() ?
-                                 rep.GetReport()->locs[0] : 0;
-  if (!OutputReport(ctx, rep, rep.GetReport()->mops[0]->stack,
-                              rep.GetReport()->mops[1]->stack,
-                              suppress_loc))
+  if (!OutputReport(thr, rep))
     return;
 
   AddRacyStacks(thr, traces, addr_min, addr_max);
diff --git a/lib/tsan/rtl/tsan_rtl_thread.cc b/lib/tsan/rtl/tsan_rtl_thread.cc
index b2ac7bb..94bf754 100644
--- a/lib/tsan/rtl/tsan_rtl_thread.cc
+++ b/lib/tsan/rtl/tsan_rtl_thread.cc
@@ -134,6 +134,7 @@
     ctx->dd->DestroyPhysicalThread(thr->dd_pt);
     ctx->dd->DestroyLogicalThread(thr->dd_lt);
   }
+  ctx->metamap.OnThreadIdle(thr);
 #ifndef TSAN_GO
   AllocatorThreadFinish(thr);
 #endif
@@ -205,9 +206,9 @@
       MaybeReportThreadLeak, &leaks);
   for (uptr i = 0; i < leaks.Size(); i++) {
     ScopedReport rep(ReportTypeThreadLeak);
-    rep.AddThread(leaks[i].tctx);
+    rep.AddThread(leaks[i].tctx, true);
     rep.SetCount(leaks[i].count);
-    OutputReport(ctx, rep, rep.GetReport()->threads[0]->stack);
+    OutputReport(thr, rep);
   }
 #endif
 }
diff --git a/lib/tsan/rtl/tsan_stack_trace.cc b/lib/tsan/rtl/tsan_stack_trace.cc
new file mode 100644
index 0000000..a8374f4
--- /dev/null
+++ b/lib/tsan/rtl/tsan_stack_trace.cc
@@ -0,0 +1,112 @@
+//===-- tsan_stack_trace.cc -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+//#include "sanitizer_common/sanitizer_placement_new.h"
+#include "tsan_stack_trace.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+
+namespace __tsan {
+
+StackTrace::StackTrace()
+    : n_()
+    , s_()
+    , c_() {
+}
+
+StackTrace::StackTrace(uptr *buf, uptr cnt)
+    : n_()
+    , s_(buf)
+    , c_(cnt) {
+  CHECK_NE(buf, 0);
+  CHECK_NE(cnt, 0);
+}
+
+StackTrace::~StackTrace() {
+  Reset();
+}
+
+void StackTrace::Reset() {
+  if (s_ && !c_) {
+    CHECK_NE(n_, 0);
+    internal_free(s_);
+    s_ = 0;
+  }
+  n_ = 0;
+}
+
+void StackTrace::Init(const uptr *pcs, uptr cnt) {
+  Reset();
+  if (cnt == 0)
+    return;
+  if (c_) {
+    CHECK_NE(s_, 0);
+    CHECK_LE(cnt, c_);
+  } else {
+    s_ = (uptr*)internal_alloc(MBlockStackTrace, cnt * sizeof(s_[0]));
+  }
+  n_ = cnt;
+  internal_memcpy(s_, pcs, cnt * sizeof(s_[0]));
+}
+
+void StackTrace::ObtainCurrent(ThreadState *thr, uptr toppc) {
+  Reset();
+  n_ = thr->shadow_stack_pos - thr->shadow_stack;
+  if (n_ + !!toppc == 0)
+    return;
+  uptr start = 0;
+  if (c_) {
+    CHECK_NE(s_, 0);
+    if (n_ + !!toppc > c_) {
+      start = n_ - c_ + !!toppc;
+      n_ = c_ - !!toppc;
+    }
+  } else {
+    // Cap potentially huge stacks.
+    if (n_ + !!toppc > kTraceStackSize) {
+      start = n_ - kTraceStackSize + !!toppc;
+      n_ = kTraceStackSize - !!toppc;
+    }
+    s_ = (uptr*)internal_alloc(MBlockStackTrace,
+                               (n_ + !!toppc) * sizeof(s_[0]));
+  }
+  for (uptr i = 0; i < n_; i++)
+    s_[i] = thr->shadow_stack[start + i];
+  if (toppc) {
+    s_[n_] = toppc;
+    n_++;
+  }
+}
+
+void StackTrace::CopyFrom(const StackTrace& other) {
+  Reset();
+  Init(other.Begin(), other.Size());
+}
+
+bool StackTrace::IsEmpty() const {
+  return n_ == 0;
+}
+
+uptr StackTrace::Size() const {
+  return n_;
+}
+
+uptr StackTrace::Get(uptr i) const {
+  CHECK_LT(i, n_);
+  return s_[i];
+}
+
+const uptr *StackTrace::Begin() const {
+  return s_;
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_stack_trace.h b/lib/tsan/rtl/tsan_stack_trace.h
new file mode 100644
index 0000000..fe82f6e
--- /dev/null
+++ b/lib/tsan/rtl/tsan_stack_trace.h
@@ -0,0 +1,54 @@
+//===-- tsan_stack_trace.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_STACK_TRACE_H
+#define TSAN_STACK_TRACE_H
+
+//#include "sanitizer_common/sanitizer_atomic.h"
+//#include "sanitizer_common/sanitizer_common.h"
+//#include "sanitizer_common/sanitizer_deadlock_detector_interface.h"
+#include "tsan_defs.h"
+//#include "tsan_clock.h"
+//#include "tsan_mutex.h"
+//#include "tsan_dense_alloc.h"
+
+namespace __tsan {
+
+class StackTrace {
+ public:
+  StackTrace();
+  // Initialized the object in "static mode",
+  // in this mode it never calls malloc/free but uses the provided buffer.
+  StackTrace(uptr *buf, uptr cnt);
+  ~StackTrace();
+  void Reset();
+
+  void Init(const uptr *pcs, uptr cnt);
+  void ObtainCurrent(ThreadState *thr, uptr toppc);
+  bool IsEmpty() const;
+  uptr Size() const;
+  uptr Get(uptr i) const;
+  const uptr *Begin() const;
+  void CopyFrom(const StackTrace& other);
+
+ private:
+  uptr n_;
+  uptr *s_;
+  const uptr c_;
+
+  StackTrace(const StackTrace&);
+  void operator = (const StackTrace&);
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_STACK_TRACE_H
diff --git a/lib/tsan/rtl/tsan_stat.cc b/lib/tsan/rtl/tsan_stat.cc
index b42348a..350a2ba 100644
--- a/lib/tsan/rtl/tsan_stat.cc
+++ b/lib/tsan/rtl/tsan_stat.cc
@@ -37,6 +37,7 @@
   name[StatMop4]                         = "            size 4                ";
   name[StatMop8]                         = "            size 8                ";
   name[StatMopSame]                      = "  Including same                  ";
+  name[StatMopIgnored]                   = "  Including ignored               ";
   name[StatMopRange]                     = "  Including range                 ";
   name[StatMopRodata]                    = "  Including .rodata               ";
   name[StatMopRangeRodata]               = "  Including .rodata range         ";
diff --git a/lib/tsan/rtl/tsan_stat.h b/lib/tsan/rtl/tsan_stat.h
index 8cdf146..0bd949e 100644
--- a/lib/tsan/rtl/tsan_stat.h
+++ b/lib/tsan/rtl/tsan_stat.h
@@ -26,6 +26,7 @@
   StatMop4,
   StatMop8,
   StatMopSame,
+  StatMopIgnored,
   StatMopRange,
   StatMopRodata,
   StatMopRangeRodata,
diff --git a/lib/tsan/rtl/tsan_suppressions.cc b/lib/tsan/rtl/tsan_suppressions.cc
index 0ac2526..0670396 100644
--- a/lib/tsan/rtl/tsan_suppressions.cc
+++ b/lib/tsan/rtl/tsan_suppressions.cc
@@ -123,7 +123,8 @@
 
 uptr IsSuppressed(ReportType typ, const ReportStack *stack, Suppression **sp) {
   CHECK(g_ctx);
-  if (!g_ctx->SuppressionCount() || stack == 0) return 0;
+  if (!g_ctx->SuppressionCount() || stack == 0 || !stack->suppressable)
+    return 0;
   SuppressionType stype = conv(typ);
   if (stype == SuppressionNone)
     return 0;
@@ -144,7 +145,7 @@
 uptr IsSuppressed(ReportType typ, const ReportLocation *loc, Suppression **sp) {
   CHECK(g_ctx);
   if (!g_ctx->SuppressionCount() || loc == 0 ||
-      loc->type != ReportLocationGlobal)
+      loc->type != ReportLocationGlobal || !loc->suppressable)
     return 0;
   SuppressionType stype = conv(typ);
   if (stype == SuppressionNone)
diff --git a/lib/tsan/rtl/tsan_sync.cc b/lib/tsan/rtl/tsan_sync.cc
index 5d71f9f..10f52b4 100644
--- a/lib/tsan/rtl/tsan_sync.cc
+++ b/lib/tsan/rtl/tsan_sync.cc
@@ -19,293 +19,202 @@
 
 void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s);
 
-SyncVar::SyncVar(uptr addr, u64 uid)
-  : mtx(MutexTypeSyncVar, StatMtxSyncVar)
-  , addr(addr)
-  , uid(uid)
-  , creation_stack_id()
-  , owner_tid(kInvalidTid)
-  , last_lock()
-  , recursion()
-  , is_rw()
-  , is_recursive()
-  , is_broken()
-  , is_linker_init() {
+SyncVar::SyncVar()
+    : mtx(MutexTypeSyncVar, StatMtxSyncVar) {
+  Reset();
 }
 
-SyncTab::Part::Part()
-  : mtx(MutexTypeSyncTab, StatMtxSyncTab)
-  , val() {
+void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid) {
+  this->addr = addr;
+  this->uid = uid;
+  this->next = 0;
+
+  creation_stack_id = 0;
+  if (kCppMode)  // Go does not use them
+    creation_stack_id = CurrentStackId(thr, pc);
+  if (flags()->detect_deadlocks)
+    DDMutexInit(thr, pc, this);
 }
 
-SyncTab::SyncTab() {
+void SyncVar::Reset() {
+  uid = 0;
+  creation_stack_id = 0;
+  owner_tid = kInvalidTid;
+  last_lock = 0;
+  recursion = 0;
+  is_rw = 0;
+  is_recursive = 0;
+  is_broken = 0;
+  is_linker_init = 0;
+
+  clock.Zero();
+  read_clock.Reset();
 }
 
-SyncTab::~SyncTab() {
-  for (int i = 0; i < kPartCount; i++) {
-    while (tab_[i].val) {
-      SyncVar *tmp = tab_[i].val;
-      tab_[i].val = tmp->next;
-      DestroyAndFree(tmp);
+MetaMap::MetaMap() {
+  atomic_store(&uid_gen_, 0, memory_order_relaxed);
+}
+
+void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) {
+  u32 idx = block_alloc_.Alloc(&thr->block_cache);
+  MBlock *b = block_alloc_.Map(idx);
+  b->siz = sz;
+  b->tid = thr->tid;
+  b->stk = CurrentStackId(thr, pc);
+  u32 *meta = MemToMeta(p);
+  DCHECK_EQ(*meta, 0);
+  *meta = idx | kFlagBlock;
+}
+
+uptr MetaMap::FreeBlock(ThreadState *thr, uptr pc, uptr p) {
+  MBlock* b = GetBlock(p);
+  if (b == 0)
+    return 0;
+  uptr sz = RoundUpTo(b->siz, kMetaShadowCell);
+  FreeRange(thr, pc, p, sz);
+  return sz;
+}
+
+void MetaMap::FreeRange(ThreadState *thr, uptr pc, uptr p, uptr sz) {
+  u32 *meta = MemToMeta(p);
+  u32 *end = MemToMeta(p + sz);
+  if (end == meta)
+    end++;
+  for (; meta < end; meta++) {
+    u32 idx = *meta;
+    *meta = 0;
+    for (;;) {
+      if (idx == 0)
+        break;
+      if (idx & kFlagBlock) {
+        block_alloc_.Free(&thr->block_cache, idx & ~kFlagMask);
+        break;
+      } else if (idx & kFlagSync) {
+        DCHECK(idx & kFlagSync);
+        SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask);
+        u32 next = s->next;
+        s->Reset();
+        sync_alloc_.Free(&thr->sync_cache, idx & ~kFlagMask);
+        idx = next;
+      } else {
+        CHECK(0);
+      }
     }
   }
 }
 
-SyncVar* SyncTab::GetOrCreateAndLock(ThreadState *thr, uptr pc,
-                                     uptr addr, bool write_lock) {
+MBlock* MetaMap::GetBlock(uptr p) {
+  u32 *meta = MemToMeta(p);
+  u32 idx = *meta;
+  for (;;) {
+    if (idx == 0)
+      return 0;
+    if (idx & kFlagBlock)
+      return block_alloc_.Map(idx & ~kFlagMask);
+    DCHECK(idx & kFlagSync);
+    SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
+    idx = s->next;
+  }
+}
+
+SyncVar* MetaMap::GetOrCreateAndLock(ThreadState *thr, uptr pc,
+                              uptr addr, bool write_lock) {
   return GetAndLock(thr, pc, addr, write_lock, true);
 }
 
-SyncVar* SyncTab::GetIfExistsAndLock(uptr addr, bool write_lock) {
-  return GetAndLock(0, 0, addr, write_lock, false);
+SyncVar* MetaMap::GetIfExistsAndLock(uptr addr) {
+  return GetAndLock(0, 0, addr, true, false);
 }
 
-SyncVar* SyncTab::Create(ThreadState *thr, uptr pc, uptr addr) {
-  StatInc(thr, StatSyncCreated);
-  void *mem = internal_alloc(MBlockSync, sizeof(SyncVar));
-  const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
-  SyncVar *res = new(mem) SyncVar(addr, uid);
-  res->creation_stack_id = 0;
-  if (!kGoMode)  // Go does not use them
-    res->creation_stack_id = CurrentStackId(thr, pc);
-  if (flags()->detect_deadlocks)
-    DDMutexInit(thr, pc, res);
-  return res;
-}
-
-SyncVar* SyncTab::GetAndLock(ThreadState *thr, uptr pc,
+SyncVar* MetaMap::GetAndLock(ThreadState *thr, uptr pc,
                              uptr addr, bool write_lock, bool create) {
-#ifndef TSAN_GO
-  {  // NOLINT
-    SyncVar *res = GetJavaSync(thr, pc, addr, write_lock, create);
-    if (res)
-      return res;
-  }
-
-  // Here we ask only PrimaryAllocator, because
-  // SecondaryAllocator::PointerIsMine() is slow and we have fallback on
-  // the hashmap anyway.
-  if (PrimaryAllocator::PointerIsMine((void*)addr)) {
-    MBlock *b = user_mblock(thr, (void*)addr);
-    CHECK_NE(b, 0);
-    MBlock::ScopedLock l(b);
-    SyncVar *res = 0;
-    for (res = b->ListHead(); res; res = res->next) {
-      if (res->addr == addr)
+  u32 *meta = MemToMeta(addr);
+  u32 idx0 = *meta;
+  u32 myidx = 0;
+  SyncVar *mys = 0;
+  for (;;) {
+    u32 idx = idx0;
+    for (;;) {
+      if (idx == 0)
         break;
-    }
-    if (res == 0) {
-      if (!create)
-        return 0;
-      res = Create(thr, pc, addr);
-      b->ListPush(res);
-    }
-    if (write_lock)
-      res->mtx.Lock();
-    else
-      res->mtx.ReadLock();
-    return res;
-  }
-#endif
-
-  Part *p = &tab_[PartIdx(addr)];
-  {
-    ReadLock l(&p->mtx);
-    for (SyncVar *res = p->val; res; res = res->next) {
-      if (res->addr == addr) {
+      if (idx & kFlagBlock)
+        break;
+      DCHECK(idx & kFlagSync);
+      SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
+      if (s->addr == addr) {
+        if (myidx != 0) {
+          mys->Reset();
+          sync_alloc_.Free(&thr->sync_cache, myidx);
+        }
         if (write_lock)
-          res->mtx.Lock();
+          s->mtx.Lock();
         else
-          res->mtx.ReadLock();
-        return res;
+          s->mtx.ReadLock();
+        return s;
       }
+      idx = s->next;
+    }
+    if (!create)
+      return 0;
+    if (*meta != idx0) {
+      idx0 = *meta;
+      continue;
+    }
+
+    if (myidx == 0) {
+      const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
+      myidx = sync_alloc_.Alloc(&thr->sync_cache);
+      mys = sync_alloc_.Map(myidx);
+      mys->Init(thr, pc, addr, uid);
+    }
+    mys->next = idx0;
+    if (atomic_compare_exchange_strong((atomic_uint32_t*)meta, &idx0,
+        myidx | kFlagSync, memory_order_release)) {
+      if (write_lock)
+        mys->mtx.Lock();
+      else
+        mys->mtx.ReadLock();
+      return mys;
     }
   }
-  if (!create)
-    return 0;
-  {
-    Lock l(&p->mtx);
-    SyncVar *res = p->val;
-    for (; res; res = res->next) {
-      if (res->addr == addr)
+}
+
+void MetaMap::MoveMemory(uptr src, uptr dst, uptr sz) {
+  // src and dst can overlap,
+  // there are no concurrent accesses to the regions (e.g. stop-the-world).
+  CHECK_NE(src, dst);
+  CHECK_NE(sz, 0);
+  uptr diff = dst - src;
+  u32 *src_meta = MemToMeta(src);
+  u32 *dst_meta = MemToMeta(dst);
+  u32 *src_meta_end = MemToMeta(src + sz);
+  uptr inc = 1;
+  if (dst > src) {
+    src_meta = MemToMeta(src + sz) - 1;
+    dst_meta = MemToMeta(dst + sz) - 1;
+    src_meta_end = MemToMeta(src) - 1;
+    inc = -1;
+  }
+  for (; src_meta != src_meta_end; src_meta += inc, dst_meta += inc) {
+    CHECK_EQ(*dst_meta, 0);
+    u32 idx = *src_meta;
+    *src_meta = 0;
+    *dst_meta = idx;
+    // Patch the addresses in sync objects.
+    while (idx != 0) {
+      if (idx & kFlagBlock)
         break;
-    }
-    if (res == 0) {
-      res = Create(thr, pc, addr);
-      res->next = p->val;
-      p->val = res;
-    }
-    if (write_lock)
-      res->mtx.Lock();
-    else
-      res->mtx.ReadLock();
-    return res;
-  }
-}
-
-SyncVar* SyncTab::GetAndRemove(ThreadState *thr, uptr pc, uptr addr) {
-#ifndef TSAN_GO
-  {  // NOLINT
-    SyncVar *res = GetAndRemoveJavaSync(thr, pc, addr);
-    if (res)
-      return res;
-  }
-  if (PrimaryAllocator::PointerIsMine((void*)addr)) {
-    MBlock *b = user_mblock(thr, (void*)addr);
-    CHECK_NE(b, 0);
-    SyncVar *res = 0;
-    {
-      MBlock::ScopedLock l(b);
-      res = b->ListHead();
-      if (res) {
-        if (res->addr == addr) {
-          if (res->is_linker_init)
-            return 0;
-          b->ListPop();
-        } else {
-          SyncVar **prev = &res->next;
-          res = *prev;
-          while (res) {
-            if (res->addr == addr) {
-              if (res->is_linker_init)
-                return 0;
-              *prev = res->next;
-              break;
-            }
-            prev = &res->next;
-            res = *prev;
-          }
-        }
-        if (res) {
-          StatInc(thr, StatSyncDestroyed);
-          res->mtx.Lock();
-          res->mtx.Unlock();
-        }
-      }
-    }
-    return res;
-  }
-#endif
-
-  Part *p = &tab_[PartIdx(addr)];
-  SyncVar *res = 0;
-  {
-    Lock l(&p->mtx);
-    SyncVar **prev = &p->val;
-    res = *prev;
-    while (res) {
-      if (res->addr == addr) {
-        if (res->is_linker_init)
-          return 0;
-        *prev = res->next;
-        break;
-      }
-      prev = &res->next;
-      res = *prev;
+      CHECK(idx & kFlagSync);
+      SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask);
+      s->addr += diff;
+      idx = s->next;
     }
   }
-  if (res) {
-    StatInc(thr, StatSyncDestroyed);
-    res->mtx.Lock();
-    res->mtx.Unlock();
-  }
-  return res;
 }
 
-int SyncTab::PartIdx(uptr addr) {
-  return (addr >> 3) % kPartCount;
-}
-
-StackTrace::StackTrace()
-    : n_()
-    , s_()
-    , c_() {
-}
-
-StackTrace::StackTrace(uptr *buf, uptr cnt)
-    : n_()
-    , s_(buf)
-    , c_(cnt) {
-  CHECK_NE(buf, 0);
-  CHECK_NE(cnt, 0);
-}
-
-StackTrace::~StackTrace() {
-  Reset();
-}
-
-void StackTrace::Reset() {
-  if (s_ && !c_) {
-    CHECK_NE(n_, 0);
-    internal_free(s_);
-    s_ = 0;
-  }
-  n_ = 0;
-}
-
-void StackTrace::Init(const uptr *pcs, uptr cnt) {
-  Reset();
-  if (cnt == 0)
-    return;
-  if (c_) {
-    CHECK_NE(s_, 0);
-    CHECK_LE(cnt, c_);
-  } else {
-    s_ = (uptr*)internal_alloc(MBlockStackTrace, cnt * sizeof(s_[0]));
-  }
-  n_ = cnt;
-  internal_memcpy(s_, pcs, cnt * sizeof(s_[0]));
-}
-
-void StackTrace::ObtainCurrent(ThreadState *thr, uptr toppc) {
-  Reset();
-  n_ = thr->shadow_stack_pos - thr->shadow_stack;
-  if (n_ + !!toppc == 0)
-    return;
-  uptr start = 0;
-  if (c_) {
-    CHECK_NE(s_, 0);
-    if (n_ + !!toppc > c_) {
-      start = n_ - c_ + !!toppc;
-      n_ = c_ - !!toppc;
-    }
-  } else {
-    // Cap potentially huge stacks.
-    if (n_ + !!toppc > kTraceStackSize) {
-      start = n_ - kTraceStackSize + !!toppc;
-      n_ = kTraceStackSize - !!toppc;
-    }
-    s_ = (uptr*)internal_alloc(MBlockStackTrace,
-                               (n_ + !!toppc) * sizeof(s_[0]));
-  }
-  for (uptr i = 0; i < n_; i++)
-    s_[i] = thr->shadow_stack[start + i];
-  if (toppc) {
-    s_[n_] = toppc;
-    n_++;
-  }
-}
-
-void StackTrace::CopyFrom(const StackTrace& other) {
-  Reset();
-  Init(other.Begin(), other.Size());
-}
-
-bool StackTrace::IsEmpty() const {
-  return n_ == 0;
-}
-
-uptr StackTrace::Size() const {
-  return n_;
-}
-
-uptr StackTrace::Get(uptr i) const {
-  CHECK_LT(i, n_);
-  return s_[i];
-}
-
-const uptr *StackTrace::Begin() const {
-  return s_;
+void MetaMap::OnThreadIdle(ThreadState *thr) {
+  block_alloc_.FlushCache(&thr->block_cache);
+  sync_alloc_.FlushCache(&thr->sync_cache);
 }
 
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_sync.h b/lib/tsan/rtl/tsan_sync.h
index ed0ac59..7c8682f 100644
--- a/lib/tsan/rtl/tsan_sync.h
+++ b/lib/tsan/rtl/tsan_sync.h
@@ -16,46 +16,21 @@
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_deadlock_detector_interface.h"
-#include "tsan_clock.h"
 #include "tsan_defs.h"
+#include "tsan_clock.h"
 #include "tsan_mutex.h"
+#include "tsan_dense_alloc.h"
 
 namespace __tsan {
 
-class StackTrace {
- public:
-  StackTrace();
-  // Initialized the object in "static mode",
-  // in this mode it never calls malloc/free but uses the provided buffer.
-  StackTrace(uptr *buf, uptr cnt);
-  ~StackTrace();
-  void Reset();
-
-  void Init(const uptr *pcs, uptr cnt);
-  void ObtainCurrent(ThreadState *thr, uptr toppc);
-  bool IsEmpty() const;
-  uptr Size() const;
-  uptr Get(uptr i) const;
-  const uptr *Begin() const;
-  void CopyFrom(const StackTrace& other);
-
- private:
-  uptr n_;
-  uptr *s_;
-  const uptr c_;
-
-  StackTrace(const StackTrace&);
-  void operator = (const StackTrace&);
-};
-
 struct SyncVar {
-  explicit SyncVar(uptr addr, u64 uid);
+  SyncVar();
 
   static const int kInvalidTid = -1;
 
+  uptr addr;  // overwritten by DenseSlabAlloc freelist
   Mutex mtx;
-  uptr addr;
-  const u64 uid;  // Globally unique id.
+  u64 uid;  // Globally unique id.
   u32 creation_stack_id;
   int owner_tid;  // Set only by exclusive owners.
   u64 last_lock;
@@ -64,13 +39,16 @@
   bool is_recursive;
   bool is_broken;
   bool is_linker_init;
-  SyncVar *next;  // In SyncTab hashtable.
+  u32 next;  // in MetaMap
   DDMutex dd;
   SyncClock read_clock;  // Used for rw mutexes only.
   // The clock is placed last, so that it is situated on a different cache line
   // with the mtx. This reduces contention for hot sync objects.
   SyncClock clock;
 
+  void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid);
+  void Reset();
+
   u64 GetId() const {
     // 47 lsb is addr, then 14 bits is low part of uid, then 3 zero bits.
     return GetLsb((u64)addr | (uid << 47), 61);
@@ -85,40 +63,39 @@
   }
 };
 
-class SyncTab {
+/* MetaMap allows to map arbitrary user pointers onto various descriptors.
+   Currently it maps pointers to heap block descriptors and sync var descs.
+   It uses 1/2 direct shadow, see tsan_platform.h.
+*/
+class MetaMap {
  public:
-  SyncTab();
-  ~SyncTab();
+  MetaMap();
+
+  void AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz);
+  uptr FreeBlock(ThreadState *thr, uptr pc, uptr p);
+  void FreeRange(ThreadState *thr, uptr pc, uptr p, uptr sz);
+  MBlock* GetBlock(uptr p);
 
   SyncVar* GetOrCreateAndLock(ThreadState *thr, uptr pc,
                               uptr addr, bool write_lock);
-  SyncVar* GetIfExistsAndLock(uptr addr, bool write_lock);
+  SyncVar* GetIfExistsAndLock(uptr addr);
 
-  // If the SyncVar does not exist, returns 0.
-  SyncVar* GetAndRemove(ThreadState *thr, uptr pc, uptr addr);
+  void MoveMemory(uptr src, uptr dst, uptr sz);
 
-  SyncVar* Create(ThreadState *thr, uptr pc, uptr addr);
+  void OnThreadIdle(ThreadState *thr);
 
  private:
-  struct Part {
-    Mutex mtx;
-    SyncVar *val;
-    char pad[kCacheLineSize - sizeof(Mutex) - sizeof(SyncVar*)];  // NOLINT
-    Part();
-  };
-
-  // FIXME: Implement something more sane.
-  static const int kPartCount = 1009;
-  Part tab_[kPartCount];
+  static const u32 kFlagMask  = 3 << 30;
+  static const u32 kFlagBlock = 1 << 30;
+  static const u32 kFlagSync  = 2 << 30;
+  typedef DenseSlabAlloc<MBlock, 1<<16, 1<<12> BlockAlloc;
+  typedef DenseSlabAlloc<SyncVar, 1<<16, 1<<10> SyncAlloc;
+  BlockAlloc block_alloc_;
+  SyncAlloc sync_alloc_;
   atomic_uint64_t uid_gen_;
 
-  int PartIdx(uptr addr);
-
-  SyncVar* GetAndLock(ThreadState *thr, uptr pc,
-                      uptr addr, bool write_lock, bool create);
-
-  SyncTab(const SyncTab&);  // Not implemented.
-  void operator = (const SyncTab&);  // Not implemented.
+  SyncVar* GetAndLock(ThreadState *thr, uptr pc, uptr addr, bool write_lock,
+                      bool create);
 };
 
 }  // namespace __tsan
diff --git a/lib/tsan/rtl/tsan_trace.h b/lib/tsan/rtl/tsan_trace.h
index 5ed0356..686160c 100644
--- a/lib/tsan/rtl/tsan_trace.h
+++ b/lib/tsan/rtl/tsan_trace.h
@@ -15,7 +15,7 @@
 
 #include "tsan_defs.h"
 #include "tsan_mutex.h"
-#include "tsan_sync.h"
+#include "tsan_stack_trace.h"
 #include "tsan_mutexset.h"
 
 namespace __tsan {
diff --git a/lib/tsan/rtl/tsan_update_shadow_word_inl.h b/lib/tsan/rtl/tsan_update_shadow_word_inl.h
index a11c9bc..c80e0a8 100644
--- a/lib/tsan/rtl/tsan_update_shadow_word_inl.h
+++ b/lib/tsan/rtl/tsan_update_shadow_word_inl.h
@@ -16,8 +16,7 @@
 do {
   StatInc(thr, StatShadowProcessed);
   const unsigned kAccessSize = 1 << kAccessSizeLog;
-  unsigned off = cur.ComputeSearchOffset();
-  u64 *sp = &shadow_mem[(idx + off) % kShadowCnt];
+  u64 *sp = &shadow_mem[idx];
   old = LoadShadow(sp);
   if (old.IsZero()) {
     StatInc(thr, StatShadowZero);
@@ -33,16 +32,6 @@
     // same thread?
     if (Shadow::TidsAreEqual(old, cur)) {
       StatInc(thr, StatShadowSameThread);
-      if (OldIsInSameSynchEpoch(old, thr)) {
-        if (old.IsRWNotWeaker(kAccessIsWrite, kIsAtomic)) {
-          // found a slot that holds effectively the same info
-          // (that is, same tid, same sync epoch and same size)
-          StatInc(thr, StatMopSame);
-          return;
-        }
-        StoreIfNotYetStored(sp, &store_word);
-        break;
-      }
       if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))
         StoreIfNotYetStored(sp, &store_word);
       break;
diff --git a/lib/tsan/rtl/tsan_vector.h b/lib/tsan/rtl/tsan_vector.h
index ae84522..a7fb3fa 100644
--- a/lib/tsan/rtl/tsan_vector.h
+++ b/lib/tsan/rtl/tsan_vector.h
@@ -78,6 +78,10 @@
   }
 
   void Resize(uptr size) {
+    if (size == 0) {
+      end_ = begin_;
+      return;
+    }
     uptr old_size = Size();
     EnsureSize(size);
     if (old_size < size) {
@@ -100,7 +104,7 @@
       return;
     }
     uptr cap0 = last_ - begin_;
-    uptr cap = 2 * cap0;
+    uptr cap = cap0 * 5 / 4;  // 25% growth
     if (cap == 0)
       cap = 16;
     if (cap < size)
diff --git a/lib/tsan/tests/unit/tsan_dense_alloc_test.cc b/lib/tsan/tests/unit/tsan_dense_alloc_test.cc
new file mode 100644
index 0000000..fc9e4cb
--- /dev/null
+++ b/lib/tsan/tests/unit/tsan_dense_alloc_test.cc
@@ -0,0 +1,55 @@
+//===-- tsan_dense_alloc_test.cc ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_dense_alloc.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+#include "gtest/gtest.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <map>
+
+namespace __tsan {
+
+TEST(DenseSlabAlloc, Basic) {
+  typedef DenseSlabAlloc<int, 128, 128> Alloc;
+  typedef Alloc::Cache Cache;
+  typedef Alloc::IndexT IndexT;
+  const int N = 1000;
+
+  Alloc alloc;
+  Cache cache;
+  alloc.InitCache(&cache);
+
+  IndexT blocks[N];
+  for (int ntry = 0; ntry < 3; ntry++) {
+    for (int i = 0; i < N; i++) {
+      IndexT idx = alloc.Alloc(&cache);
+      blocks[i] = idx;
+      EXPECT_NE(idx, 0);
+      int *v = alloc.Map(idx);
+      *v = i;
+    }
+
+    for (int i = 0; i < N; i++) {
+      IndexT idx = blocks[i];
+      int *v = alloc.Map(idx);
+      EXPECT_EQ(*v, i);
+      alloc.Free(&cache, idx);
+    }
+
+    alloc.FlushCache(&cache);
+  }
+}
+
+}  // namespace __tsan
diff --git a/lib/tsan/tests/unit/tsan_mman_test.cc b/lib/tsan/tests/unit/tsan_mman_test.cc
index 5e39bea..0c4a8ff 100644
--- a/lib/tsan/tests/unit/tsan_mman_test.cc
+++ b/lib/tsan/tests/unit/tsan_mman_test.cc
@@ -11,20 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 #include <limits>
+#include <sanitizer/allocator_interface.h>
 #include "tsan_mman.h"
 #include "tsan_rtl.h"
 #include "gtest/gtest.h"
 
-extern "C" {
-uptr __tsan_get_current_allocated_bytes();
-uptr __tsan_get_heap_size();
-uptr __tsan_get_free_bytes();
-uptr __tsan_get_unmapped_bytes();
-uptr __tsan_get_estimated_allocated_size(uptr size);
-bool __tsan_get_ownership(void *p);
-uptr __tsan_get_allocated_size(void *p);
-}
-
 namespace __tsan {
 
 TEST(Mman, Internal) {
@@ -51,20 +42,8 @@
   char *p2 = (char*)user_alloc(thr, pc, 20);
   EXPECT_NE(p2, (char*)0);
   EXPECT_NE(p2, p);
-  MBlock *b = user_mblock(thr, p);
-  EXPECT_NE(b, (MBlock*)0);
-  EXPECT_EQ(b->Size(), (uptr)10);
-  MBlock *b2 = user_mblock(thr, p2);
-  EXPECT_NE(b2, (MBlock*)0);
-  EXPECT_EQ(b2->Size(), (uptr)20);
-  for (int i = 0; i < 10; i++) {
-    p[i] = 42;
-    EXPECT_EQ(b, user_mblock(thr, p + i));
-  }
-  for (int i = 0; i < 20; i++) {
-    ((char*)p2)[i] = 42;
-    EXPECT_EQ(b2, user_mblock(thr, p2 + i));
-  }
+  EXPECT_EQ(10U, user_alloc_usable_size(p));
+  EXPECT_EQ(20U, user_alloc_usable_size(p2));
   user_free(thr, pc, p);
   user_free(thr, pc, p2);
 }
@@ -119,43 +98,49 @@
   uptr pc = 0;
   char *p = (char*)user_alloc(thr, pc, 10);
   char *p2 = (char*)user_alloc(thr, pc, 20);
-  EXPECT_EQ(0U, user_alloc_usable_size(thr, pc, NULL));
-  EXPECT_EQ(10U, user_alloc_usable_size(thr, pc, p));
-  EXPECT_EQ(20U, user_alloc_usable_size(thr, pc, p2));
+  EXPECT_EQ(0U, user_alloc_usable_size(NULL));
+  EXPECT_EQ(10U, user_alloc_usable_size(p));
+  EXPECT_EQ(20U, user_alloc_usable_size(p2));
   user_free(thr, pc, p);
   user_free(thr, pc, p2);
+  EXPECT_EQ(0U, user_alloc_usable_size((void*)0x123));
 }
 
 TEST(Mman, Stats) {
   ThreadState *thr = cur_thread();
 
-  uptr alloc0 = __tsan_get_current_allocated_bytes();
-  uptr heap0 = __tsan_get_heap_size();
-  uptr free0 = __tsan_get_free_bytes();
-  uptr unmapped0 = __tsan_get_unmapped_bytes();
+  uptr alloc0 = __sanitizer_get_current_allocated_bytes();
+  uptr heap0 = __sanitizer_get_heap_size();
+  uptr free0 = __sanitizer_get_free_bytes();
+  uptr unmapped0 = __sanitizer_get_unmapped_bytes();
 
-  EXPECT_EQ(__tsan_get_estimated_allocated_size(10), (uptr)10);
-  EXPECT_EQ(__tsan_get_estimated_allocated_size(20), (uptr)20);
-  EXPECT_EQ(__tsan_get_estimated_allocated_size(100), (uptr)100);
+  EXPECT_EQ(10U, __sanitizer_get_estimated_allocated_size(10));
+  EXPECT_EQ(20U, __sanitizer_get_estimated_allocated_size(20));
+  EXPECT_EQ(100U, __sanitizer_get_estimated_allocated_size(100));
 
   char *p = (char*)user_alloc(thr, 0, 10);
-  EXPECT_EQ(__tsan_get_ownership(p), true);
-  EXPECT_EQ(__tsan_get_allocated_size(p), (uptr)10);
+  EXPECT_TRUE(__sanitizer_get_ownership(p));
+  EXPECT_EQ(10U, __sanitizer_get_allocated_size(p));
 
-  EXPECT_EQ(__tsan_get_current_allocated_bytes(), alloc0 + 16);
-  EXPECT_GE(__tsan_get_heap_size(), heap0);
-  EXPECT_EQ(__tsan_get_free_bytes(), free0);
-  EXPECT_EQ(__tsan_get_unmapped_bytes(), unmapped0);
+  EXPECT_EQ(alloc0 + 16, __sanitizer_get_current_allocated_bytes());
+  EXPECT_GE(__sanitizer_get_heap_size(), heap0);
+  EXPECT_EQ(free0, __sanitizer_get_free_bytes());
+  EXPECT_EQ(unmapped0, __sanitizer_get_unmapped_bytes());
 
   user_free(thr, 0, p);
 
-  EXPECT_EQ(__tsan_get_current_allocated_bytes(), alloc0);
-  EXPECT_GE(__tsan_get_heap_size(), heap0);
-  EXPECT_EQ(__tsan_get_free_bytes(), free0);
-  EXPECT_EQ(__tsan_get_unmapped_bytes(), unmapped0);
+  EXPECT_EQ(alloc0, __sanitizer_get_current_allocated_bytes());
+  EXPECT_GE(__sanitizer_get_heap_size(), heap0);
+  EXPECT_EQ(free0, __sanitizer_get_free_bytes());
+  EXPECT_EQ(unmapped0, __sanitizer_get_unmapped_bytes());
 }
 
 TEST(Mman, CallocOverflow) {
+#if TSAN_DEBUG
+  // EXPECT_DEATH clones a thread with 4K stack,
+  // which is overflown by tsan memory accesses functions in debug mode.
+  return;
+#endif
   size_t kArraySize = 4096;
   volatile size_t kMaxSizeT = std::numeric_limits<size_t>::max();
   volatile size_t kArraySize2 = kMaxSizeT / kArraySize + 10;
diff --git a/lib/tsan/tests/unit/tsan_sync_test.cc b/lib/tsan/tests/unit/tsan_sync_test.cc
index 1cfcf99..6f36c64 100644
--- a/lib/tsan/tests/unit/tsan_sync_test.cc
+++ b/lib/tsan/tests/unit/tsan_sync_test.cc
@@ -12,53 +12,112 @@
 //===----------------------------------------------------------------------===//
 #include "tsan_sync.h"
 #include "tsan_rtl.h"
-#include "tsan_mman.h"
 #include "gtest/gtest.h"
 
-#include <stdlib.h>
-#include <stdint.h>
-#include <map>
-
 namespace __tsan {
 
-TEST(Sync, Table) {
-  const uintptr_t kIters = 512*1024;
-  const uintptr_t kRange = 10000;
-
+TEST(MetaMap, Basic) {
   ThreadState *thr = cur_thread();
-  uptr pc = 0;
+  MetaMap *m = &ctx->metamap;
+  u64 block[1] = {};  // fake malloc block
+  m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64));
+  MBlock *mb = m->GetBlock((uptr)&block[0]);
+  EXPECT_NE(mb, (MBlock*)0);
+  EXPECT_EQ(mb->siz, 1 * sizeof(u64));
+  EXPECT_EQ(mb->tid, thr->tid);
+  uptr sz = m->FreeBlock(thr, 0, (uptr)&block[0]);
+  EXPECT_EQ(sz, 1 * sizeof(u64));
+  mb = m->GetBlock((uptr)&block[0]);
+  EXPECT_EQ(mb, (MBlock*)0);
+}
 
-  SyncTab tab;
-  SyncVar *golden[kRange] = {};
-  unsigned seed = 0;
-  for (uintptr_t i = 0; i < kIters; i++) {
-    uintptr_t addr = rand_r(&seed) % (kRange - 1) + 1;
-    if (rand_r(&seed) % 2) {
-      // Get or add.
-      SyncVar *v = tab.GetOrCreateAndLock(thr, pc, addr, true);
-      EXPECT_TRUE(golden[addr] == 0 || golden[addr] == v);
-      EXPECT_EQ(v->addr, addr);
-      golden[addr] = v;
-      v->mtx.Unlock();
-    } else {
-      // Remove.
-      SyncVar *v = tab.GetAndRemove(thr, pc, addr);
-      EXPECT_EQ(golden[addr], v);
-      if (v) {
-        EXPECT_EQ(v->addr, addr);
-        golden[addr] = 0;
-        DestroyAndFree(v);
-      }
-    }
-  }
-  for (uintptr_t addr = 0; addr < kRange; addr++) {
-    if (golden[addr] == 0)
-      continue;
-    SyncVar *v = tab.GetAndRemove(thr, pc, addr);
-    EXPECT_EQ(v, golden[addr]);
-    EXPECT_EQ(v->addr, addr);
-    DestroyAndFree(v);
-  }
+TEST(MetaMap, FreeRange) {
+  ThreadState *thr = cur_thread();
+  MetaMap *m = &ctx->metamap;
+  u64 block[4] = {};  // fake malloc block
+  m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64));
+  m->AllocBlock(thr, 0, (uptr)&block[1], 3 * sizeof(u64));
+  MBlock *mb1 = m->GetBlock((uptr)&block[0]);
+  EXPECT_EQ(mb1->siz, 1 * sizeof(u64));
+  MBlock *mb2 = m->GetBlock((uptr)&block[1]);
+  EXPECT_EQ(mb2->siz, 3 * sizeof(u64));
+  m->FreeRange(thr, 0, (uptr)&block[0], 4 * sizeof(u64));
+  mb1 = m->GetBlock((uptr)&block[0]);
+  EXPECT_EQ(mb1, (MBlock*)0);
+  mb2 = m->GetBlock((uptr)&block[1]);
+  EXPECT_EQ(mb2, (MBlock*)0);
+}
+
+TEST(MetaMap, Sync) {
+  ThreadState *thr = cur_thread();
+  MetaMap *m = &ctx->metamap;
+  u64 block[4] = {};  // fake malloc block
+  m->AllocBlock(thr, 0, (uptr)&block[0], 4 * sizeof(u64));
+  SyncVar *s1 = m->GetIfExistsAndLock((uptr)&block[0]);
+  EXPECT_EQ(s1, (SyncVar*)0);
+  s1 = m->GetOrCreateAndLock(thr, 0, (uptr)&block[0], true);
+  EXPECT_NE(s1, (SyncVar*)0);
+  EXPECT_EQ(s1->addr, (uptr)&block[0]);
+  s1->mtx.Unlock();
+  SyncVar *s2 = m->GetOrCreateAndLock(thr, 0, (uptr)&block[1], false);
+  EXPECT_NE(s2, (SyncVar*)0);
+  EXPECT_EQ(s2->addr, (uptr)&block[1]);
+  s2->mtx.ReadUnlock();
+  m->FreeBlock(thr, 0, (uptr)&block[0]);
+  s1 = m->GetIfExistsAndLock((uptr)&block[0]);
+  EXPECT_EQ(s1, (SyncVar*)0);
+  s2 = m->GetIfExistsAndLock((uptr)&block[1]);
+  EXPECT_EQ(s2, (SyncVar*)0);
+  m->OnThreadIdle(thr);
+}
+
+TEST(MetaMap, MoveMemory) {
+  ThreadState *thr = cur_thread();
+  MetaMap *m = &ctx->metamap;
+  u64 block1[4] = {};  // fake malloc block
+  u64 block2[4] = {};  // fake malloc block
+  m->AllocBlock(thr, 0, (uptr)&block1[0], 3 * sizeof(u64));
+  m->AllocBlock(thr, 0, (uptr)&block1[3], 1 * sizeof(u64));
+  SyncVar *s1 = m->GetOrCreateAndLock(thr, 0, (uptr)&block1[0], true);
+  s1->mtx.Unlock();
+  SyncVar *s2 = m->GetOrCreateAndLock(thr, 0, (uptr)&block1[1], true);
+  s2->mtx.Unlock();
+  m->MoveMemory((uptr)&block1[0], (uptr)&block2[0], 4 * sizeof(u64));
+  MBlock *mb1 = m->GetBlock((uptr)&block1[0]);
+  EXPECT_EQ(mb1, (MBlock*)0);
+  MBlock *mb2 = m->GetBlock((uptr)&block1[3]);
+  EXPECT_EQ(mb2, (MBlock*)0);
+  mb1 = m->GetBlock((uptr)&block2[0]);
+  EXPECT_NE(mb1, (MBlock*)0);
+  EXPECT_EQ(mb1->siz, 3 * sizeof(u64));
+  mb2 = m->GetBlock((uptr)&block2[3]);
+  EXPECT_NE(mb2, (MBlock*)0);
+  EXPECT_EQ(mb2->siz, 1 * sizeof(u64));
+  s1 = m->GetIfExistsAndLock((uptr)&block1[0]);
+  EXPECT_EQ(s1, (SyncVar*)0);
+  s2 = m->GetIfExistsAndLock((uptr)&block1[1]);
+  EXPECT_EQ(s2, (SyncVar*)0);
+  s1 = m->GetIfExistsAndLock((uptr)&block2[0]);
+  EXPECT_NE(s1, (SyncVar*)0);
+  EXPECT_EQ(s1->addr, (uptr)&block2[0]);
+  s1->mtx.Unlock();
+  s2 = m->GetIfExistsAndLock((uptr)&block2[1]);
+  EXPECT_NE(s2, (SyncVar*)0);
+  EXPECT_EQ(s2->addr, (uptr)&block2[1]);
+  s2->mtx.Unlock();
+  m->FreeRange(thr, 0, (uptr)&block2[0], 4 * sizeof(u64));
+}
+
+TEST(MetaMap, ResetSync) {
+  ThreadState *thr = cur_thread();
+  MetaMap *m = &ctx->metamap;
+  u64 block[1] = {};  // fake malloc block
+  m->AllocBlock(thr, 0, (uptr)&block[0], 1 * sizeof(u64));
+  SyncVar *s = m->GetOrCreateAndLock(thr, 0, (uptr)&block[0], true);
+  s->Reset();
+  s->mtx.Unlock();
+  uptr sz = m->FreeBlock(thr, 0, (uptr)&block[0]);
+  EXPECT_EQ(sz, 1 * sizeof(u64));
 }
 
 }  // namespace __tsan
diff --git a/lib/ubsan/ubsan_diag.cc b/lib/ubsan/ubsan_diag.cc
index 7a1de21..fb5cd4b 100644
--- a/lib/ubsan/ubsan_diag.cc
+++ b/lib/ubsan/ubsan_diag.cc
@@ -38,6 +38,17 @@
   initialized = true;
 }
 
+namespace {
+class Decorator : public SanitizerCommonDecorator {
+ public:
+  Decorator() : SanitizerCommonDecorator() {}
+  const char *Highlight() const { return Green(); }
+  const char *EndHighlight() const { return Default(); }
+  const char *Note() const { return Black(); }
+  const char *EndNote() const { return Default(); }
+};
+}
+
 Location __ubsan::getCallerLocation(uptr CallerLoc) {
   if (!CallerLoc)
     return Location();
@@ -49,7 +60,6 @@
 Location __ubsan::getFunctionLocation(uptr Loc, const char **FName) {
   if (!Loc)
     return Location();
-  // FIXME: We may need to run initialization earlier.
   InitializeSanitizerCommon();
 
   AddressInfo Info;
@@ -184,8 +194,7 @@
 }
 
 /// Render a snippet of the address space near a location.
-static void renderMemorySnippet(const __sanitizer::AnsiColorDecorator &Decor,
-                                MemoryLocation Loc,
+static void renderMemorySnippet(const Decorator &Decor, MemoryLocation Loc,
                                 Range *Ranges, unsigned NumRanges,
                                 const Diag::Arg *Args) {
   const unsigned BytesToShow = 32;
@@ -212,7 +221,7 @@
   Printf("\n");
 
   // Emit highlights.
-  Printf(Decor.Green());
+  Printf(Decor.Highlight());
   Range *InRange = upperBound(Min, Ranges, NumRanges);
   for (uptr P = Min; P != Max; ++P) {
     char Pad = ' ', Byte = ' ';
@@ -227,7 +236,7 @@
     char Buffer[] = { Pad, Pad, P == Loc ? '^' : Byte, Byte, 0 };
     Printf((P % 8 == 0) ? Buffer : &Buffer[1]);
   }
-  Printf("%s\n", Decor.Default());
+  Printf("%s\n", Decor.EndHighlight());
 
   // Go over the line again, and print names for the ranges.
   InRange = 0;
@@ -265,7 +274,8 @@
 }
 
 Diag::~Diag() {
-  __sanitizer::AnsiColorDecorator Decor(PrintsToTty());
+  InitializeSanitizerCommon();
+  Decorator Decor;
   SpinMutexLock l(&CommonSanitizerReportMutex);
   Printf(Decor.Bold());
 
@@ -274,11 +284,11 @@
   switch (Level) {
   case DL_Error:
     Printf("%s runtime error: %s%s",
-           Decor.Red(), Decor.Default(), Decor.Bold());
+           Decor.Warning(), Decor.EndWarning(), Decor.Bold());
     break;
 
   case DL_Note:
-    Printf("%s note: %s", Decor.Black(), Decor.Default());
+    Printf("%s note: %s", Decor.Note(), Decor.EndNote());
     break;
   }
 
diff --git a/make/platform/clang_linux.mk b/make/platform/clang_linux.mk
index 37708f8..2edbfff 100644
--- a/make/platform/clang_linux.mk
+++ b/make/platform/clang_linux.mk
@@ -129,8 +129,10 @@
 
 FUNCTIONS.builtins-i386 := $(CommonFunctions) $(ArchFunctions.i386)
 FUNCTIONS.builtins-x86_64 := $(CommonFunctions) $(ArchFunctions.x86_64)
-FUNCTIONS.profile-i386 := GCDAProfiling
-FUNCTIONS.profile-x86_64 := GCDAProfiling
+FUNCTIONS.profile-i386 := GCDAProfiling InstrProfiling InstrProfilingBuffer \
+                          InstrProfilingFile InstrProfilingPlatformOther \
+                          InstrProfilingRuntime
+FUNCTIONS.profile-x86_64 := $(FUNCTIONS.profile-i386)
 FUNCTIONS.san-i386 := $(SanitizerCommonFunctions)
 FUNCTIONS.san-x86_64 := $(SanitizerCommonFunctions)
 FUNCTIONS.asan-i386 := $(AsanFunctions) $(InterceptionFunctions) \
diff --git a/test/asan/CMakeLists.txt b/test/asan/CMakeLists.txt
index 2488ef1..cca7ccb 100644
--- a/test/asan/CMakeLists.txt
+++ b/test/asan/CMakeLists.txt
@@ -6,10 +6,12 @@
   # This is only true if we are cross-compiling.
   # Build all tests with host compiler and use host tools.
   set(ASAN_TEST_TARGET_CC ${CMAKE_C_COMPILER})
+  set(ASAN_TEST_TARGET_CFLAGS ${COMPILER_RT_TEST_COMPILER_CFLAGS})
   get_filename_component(ASAN_TEST_LLVM_TOOLS_DIR ${CMAKE_C_COMPILER} PATH)
   set(ASAN_TEST_CONFIG_SUFFIX "-arm-android")
   set(ASAN_TEST_BITS "32")
-  get_target_flags_for_arch(arm_android ASAN_TEST_TARGET_CFLAGS)
+  set(ASAN_TEST_DYNAMIC True)
+  set(ASAN_TEST_TARGET_ARCH "arm-android")
   configure_lit_site_cfg(
     ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
     ${CMAKE_CURRENT_BINARY_DIR}/ARMAndroidConfig/lit.site.cfg
@@ -32,6 +34,21 @@
   list(APPEND ASAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/ARMLinuxConfig)
 endif()
 
+if(CAN_TARGET_aarch64)
+  # This is only true if we are cross-compiling.
+  # Build all tests with host compiler and use host tools.
+  set(ASAN_TEST_TARGET_CC ${COMPILER_RT_TEST_COMPILER})
+  set(ASAN_TEST_TARGET_CFLAGS ${COMPILER_RT_TEST_COMPILER_CFLAGS})
+  set(ASAN_TEST_CONFIG_SUFFIX "-aarch64-linux")
+  set(ASAN_TEST_BITS "64")
+  set(ASAN_TEST_DYNAMIC False)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/AArch64LinuxConfig/lit.site.cfg
+    )
+  list(APPEND ASAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/AArch64LinuxConfig)
+endif()
+
 if(CAN_TARGET_x86_64 OR CAN_TARGET_powerpc64)
   set(ASAN_TEST_CONFIG_SUFFIX "64")
   set(ASAN_TEST_BITS "64")
@@ -73,6 +90,26 @@
   endif()
 endif()
 
+if(CAN_TARGET_mips)
+  set(ASAN_TEST_CONFIG_SUFFIX "32")
+  set(ASAN_TEST_BITS "32")
+  set(ASAN_TEST_TARGET_CFLAGS ${TARGET_32_BIT_CFLAGS})
+  set(ASAN_TEST_DYNAMIC False)
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/32bitConfig/lit.site.cfg
+    )
+  list(APPEND ASAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/32bitConfig)
+  if(COMPILER_RT_BUILD_SHARED_ASAN)
+    set(ASAN_TEST_CONFIG_SUFFIX "32-Dynamic")
+    set(ASAN_TEST_DYNAMIC True)
+    configure_lit_site_cfg(
+      ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+      ${CMAKE_CURRENT_BINARY_DIR}/32bitConfig-dynamic/lit.site.cfg)
+    list(APPEND ASAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/32bitConfig-dynamic)
+  endif()
+endif()
+
 if(COMPILER_RT_INCLUDE_TESTS)
   configure_lit_site_cfg(
     ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
@@ -80,7 +117,9 @@
 endif()
 
 set(ASAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
-if(NOT COMPILER_RT_STANDALONE_BUILD)
+if(COMPILER_RT_STANDALONE_BUILD)
+  list(APPEND ASAN_TEST_DEPS ${LLVM_TOOLS_BINARY_DIR}/FileCheck)
+else()
   list(APPEND ASAN_TEST_DEPS asan)
 endif()
 
diff --git a/test/asan/TestCases/Android/coverage-android.cc b/test/asan/TestCases/Android/coverage-android.cc
new file mode 100644
index 0000000..071a2e3
--- /dev/null
+++ b/test/asan/TestCases/Android/coverage-android.cc
@@ -0,0 +1,67 @@
+// Test for direct coverage writing with dlopen.
+
+// Test normal exit.
+// RUN: %clangxx_asan -mllvm -asan-coverage=1 -DSHARED %s -shared -o %T/libcoverage_android_test_1.so -fPIC
+// RUN: %clangxx_asan -mllvm -asan-coverage=1 -DSO_DIR=\"%device\" %s -o %t
+
+// RUN: adb shell rm -rf %device/coverage-android
+// RUN: rm -rf %T/coverage-android
+
+// RUN: adb shell mkdir -p %device/coverage-android/direct
+// RUN: mkdir -p %T/coverage-android/direct
+// RUN: ASAN_OPTIONS=coverage=1:coverage_direct=1:coverage_dir=%device/coverage-android/direct:verbosity=1 %run %t
+// RUN: adb pull %device/coverage-android/direct %T/coverage-android/direct
+// RUN: ls; pwd
+// RUN: cd %T/coverage-android/direct
+// RUN: %sancov rawunpack *.sancov.raw
+// RUN: %sancov print *.sancov |& FileCheck %s
+
+
+// Test sudden death.
+// RUN: %clangxx_asan -mllvm -asan-coverage=1 -DSHARED -DKILL %s -shared -o %T/libcoverage_android_test_1.so -fPIC
+// RUN: %clangxx_asan -mllvm -asan-coverage=1 -DSO_DIR=\"%device\" %s -o %t
+
+// RUN: adb shell rm -rf %device/coverage-android-kill
+// RUN: rm -rf %T/coverage-android-kill
+
+// RUN: adb shell mkdir -p %device/coverage-android-kill/direct
+// RUN: mkdir -p %T/coverage-android-kill/direct
+// RUN: ASAN_OPTIONS=coverage=1:coverage_direct=1:coverage_dir=%device/coverage-android-kill/direct:verbosity=1 not %run %t
+// RUN: adb pull %device/coverage-android-kill/direct %T/coverage-android-kill/direct
+// RUN: ls; pwd
+// RUN: cd %T/coverage-android-kill/direct
+// RUN: %sancov rawunpack *.sancov.raw
+// RUN: %sancov print *.sancov |& FileCheck %s
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <signal.h>
+
+#ifdef SHARED
+extern "C" {
+void bar() {
+  printf("bar\n");
+#ifdef KILL
+  kill(getpid(), SIGKILL);
+#endif
+}
+}
+#else
+
+int main(int argc, char **argv) {
+  fprintf(stderr, "PID: %d\n", getpid());
+  void *handle1 =
+      dlopen(SO_DIR "/libcoverage_android_test_1.so", RTLD_LAZY);
+  assert(handle1);
+  void (*bar1)() = (void (*)())dlsym(handle1, "bar");
+  assert(bar1);
+  bar1();
+
+  return 0;
+}
+#endif
+
+// CHECK: 2 PCs total
diff --git a/test/asan/TestCases/Android/lit.local.cfg b/test/asan/TestCases/Android/lit.local.cfg
new file mode 100644
index 0000000..42513dd
--- /dev/null
+++ b/test/asan/TestCases/Android/lit.local.cfg
@@ -0,0 +1,11 @@
+def getRoot(config):
+  if not config.parent:
+    return config
+  return getRoot(config.parent)
+
+root = getRoot(config)
+
+if root.android != "TRUE":
+  config.unsupported = True
+
+config.substitutions.append( ("%device", "/data/local/tmp/Output") )
diff --git a/test/asan/TestCases/Darwin/interface_symbols_darwin.c b/test/asan/TestCases/Darwin/interface_symbols_darwin.c
index ea3e796..e513954 100644
--- a/test/asan/TestCases/Darwin/interface_symbols_darwin.c
+++ b/test/asan/TestCases/Darwin/interface_symbols_darwin.c
@@ -16,6 +16,7 @@
 // RUN:   | grep -v "__asan_on_error" > %t.symbols
 
 // RUN: cat %p/../../../../lib/asan/asan_interface_internal.h \
+// RUN:     %p/../../../../lib/asan/asan_init_version.h \
 // RUN:    | sed "s/\/\/.*//" | sed "s/typedef.*//" \
 // RUN:    | grep -v "OPTIONAL" \
 // RUN:    | grep "__asan_.*(" | sed "s/.* __asan_/__asan_/;s/(.*//" \
diff --git a/test/asan/TestCases/Linux/asan_dlopen_test.cc b/test/asan/TestCases/Linux/asan_dlopen_test.cc
index 663675c..f1e31b0 100644
--- a/test/asan/TestCases/Linux/asan_dlopen_test.cc
+++ b/test/asan/TestCases/Linux/asan_dlopen_test.cc
@@ -3,6 +3,7 @@
 // RUN: %clangxx %s -DRT=\"%shared_libasan\" -o %t -ldl
 // RUN: not %run %t 2>&1 | FileCheck %s
 // REQUIRES: asan-dynamic-runtime
+// XFAIL: android
 
 #include <dlfcn.h>
 
diff --git a/test/asan/TestCases/Linux/asan_preload_test-1.cc b/test/asan/TestCases/Linux/asan_preload_test-1.cc
index bc2da3e..ba96872 100644
--- a/test/asan/TestCases/Linux/asan_preload_test-1.cc
+++ b/test/asan/TestCases/Linux/asan_preload_test-1.cc
@@ -8,6 +8,7 @@
 // RUN: LD_PRELOAD=%shared_libasan not %run %t 2>&1 | FileCheck %s
 
 // REQUIRES: asan-dynamic-runtime
+// XFAIL: android
 
 #if BUILD_SO
 char dummy;
diff --git a/test/asan/TestCases/Linux/asan_preload_test-2.cc b/test/asan/TestCases/Linux/asan_preload_test-2.cc
index c3d7042..0741958 100644
--- a/test/asan/TestCases/Linux/asan_preload_test-2.cc
+++ b/test/asan/TestCases/Linux/asan_preload_test-2.cc
@@ -4,6 +4,7 @@
 // RUN: LD_PRELOAD=%shared_libasan not %run %t 2>&1 | FileCheck %s
 
 // REQUIRES: asan-dynamic-runtime
+// XFAIL: android
 
 #include <stdlib.h>
 
diff --git a/test/asan/TestCases/Linux/asan_rt_confict_test-1.cc b/test/asan/TestCases/Linux/asan_rt_confict_test-1.cc
index 5687f17..30f1c17 100644
--- a/test/asan/TestCases/Linux/asan_rt_confict_test-1.cc
+++ b/test/asan/TestCases/Linux/asan_rt_confict_test-1.cc
@@ -5,6 +5,7 @@
 // RUN: LD_PRELOAD=%shared_libasan not %run %t 2>&1 | FileCheck %s
 
 // REQUIRES: asan-dynamic-runtime
+// XFAIL: android
 
 #include <stdlib.h>
 int main(int argc, char **argv) { return 0; }
diff --git a/test/asan/TestCases/Linux/asan_rt_confict_test-2.cc b/test/asan/TestCases/Linux/asan_rt_confict_test-2.cc
index 9ae8468..4c935e2 100644
--- a/test/asan/TestCases/Linux/asan_rt_confict_test-2.cc
+++ b/test/asan/TestCases/Linux/asan_rt_confict_test-2.cc
@@ -6,6 +6,7 @@
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 // REQUIRES: asan-dynamic-runtime
+// XFAIL: android
 
 #if BUILD_SO
 char dummy;
diff --git a/test/asan/TestCases/Linux/clone_test.cc b/test/asan/TestCases/Linux/clone_test.cc
index bc6ef08..e9c1f16 100644
--- a/test/asan/TestCases/Linux/clone_test.cc
+++ b/test/asan/TestCases/Linux/clone_test.cc
@@ -5,6 +5,7 @@
 // RUN: %clangxx_asan -O1 %s -o %t && %run %t | FileCheck %s
 // RUN: %clangxx_asan -O2 %s -o %t && %run %t | FileCheck %s
 // RUN: %clangxx_asan -O3 %s -o %t && %run %t | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #include <stdio.h>
 #include <sched.h>
diff --git a/test/asan/TestCases/Linux/coverage-direct.cc b/test/asan/TestCases/Linux/coverage-direct.cc
index f6e7801..7fe2514 100644
--- a/test/asan/TestCases/Linux/coverage-direct.cc
+++ b/test/asan/TestCases/Linux/coverage-direct.cc
@@ -4,13 +4,13 @@
 
 // RUN: rm -rf %T/coverage-direct
 
-// RUN: mkdir -p %T/coverage-direct/normal && cd %T/coverage-direct/normal
-// RUN: ASAN_OPTIONS=coverage=1:coverage_direct=0:verbosity=1 %run %t
-// RUN: %sancov print *.sancov >out.txt
-// RUN: cd ../..
+// RUN: mkdir -p %T/coverage-direct/normal
+// RUN: ASAN_OPTIONS=coverage=1:coverage_direct=0:coverage_dir=%T/coverage-direct/normal:verbosity=1 %run %t
+// RUN: %sancov print %T/coverage-direct/normal/*.sancov >%T/coverage-direct/normal/out.txt
 
-// RUN: mkdir -p %T/coverage-direct/direct && cd %T/coverage-direct/direct
-// RUN: ASAN_OPTIONS=coverage=1:coverage_direct=1:verbosity=1 %run %t
+// RUN: mkdir -p %T/coverage-direct/direct
+// RUN: ASAN_OPTIONS=coverage=1:coverage_direct=1:coverage_dir=%T/coverage-direct/direct:verbosity=1 %run %t
+// RUN: cd %T/coverage-direct/direct
 // RUN: %sancov rawunpack *.sancov.raw
 // RUN: %sancov print *.sancov >out.txt
 // RUN: cd ../..
diff --git a/test/asan/TestCases/Linux/coverage-disabled.cc b/test/asan/TestCases/Linux/coverage-disabled.cc
new file mode 100644
index 0000000..315c312
--- /dev/null
+++ b/test/asan/TestCases/Linux/coverage-disabled.cc
@@ -0,0 +1,18 @@
+// Test that no data is collected without a runtime flag.
+//
+// RUN: %clangxx_asan -mllvm -asan-coverage=1 %s -o %t
+//
+// RUN: rm -rf %T/coverage-disabled
+//
+// RUN: mkdir -p %T/coverage-disabled/normal
+// RUN: ASAN_OPTIONS=coverage_direct=0:coverage_dir=%T/coverage-disabled/normal:verbosity=1 %run %t
+// RUN: not %sancov print %T/coverage-disabled/normal/*.sancov 2>&1
+//
+// RUN: mkdir -p %T/coverage-disabled/direct
+// RUN: ASAN_OPTIONS=coverage_direct=1:coverage_dir=%T/coverage-disabled/direct:verbosity=1 %run %t
+// RUN: cd %T/coverage-disabled/direct
+// RUN: not %sancov rawunpack *.sancov
+
+int main(int argc, char **argv) {
+  return 0;
+}
diff --git a/test/asan/TestCases/Linux/coverage-fork-direct.cc b/test/asan/TestCases/Linux/coverage-fork-direct.cc
new file mode 100644
index 0000000..7489b72
--- /dev/null
+++ b/test/asan/TestCases/Linux/coverage-fork-direct.cc
@@ -0,0 +1,38 @@
+// RUN: %clangxx_asan -mllvm -asan-coverage=1 %s -o %t
+// RUN: rm -rf %T/coverage-fork-direct
+// RUN: mkdir -p %T/coverage-fork-direct && cd %T/coverage-fork-direct
+// RUN: (ASAN_OPTIONS=coverage=1:coverage_direct=1:verbosity=1 %run %t; \
+// RUN:  %sancov rawunpack *.sancov.raw; %sancov print *.sancov) 2>&1
+//
+// XFAIL: android
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+__attribute__((noinline))
+void foo() { printf("foo\n"); }
+
+__attribute__((noinline))
+void bar() { printf("bar\n"); }
+
+__attribute__((noinline))
+void baz() { printf("baz\n"); }
+
+int main(int argc, char **argv) {
+  pid_t child_pid = fork();
+  if (child_pid == 0) {
+    fprintf(stderr, "Child PID: %d\n", getpid());
+    baz();
+  } else {
+    fprintf(stderr, "Parent PID: %d\n", getpid());
+    foo();
+    bar();
+  }
+  return 0;
+}
+
+// CHECK-DAG: Child PID: [[ChildPID:[0-9]+]]
+// CHECK-DAG: Parent PID: [[ParentPID:[0-9]+]]
+// CHECK-DAG: read 3 PCs from {{.*}}.[[ParentPID]].sancov
+// CHECK-DAG: read 1 PCs from {{.*}}.[[ChildPID]].sancov
diff --git a/test/asan/TestCases/Linux/coverage-fork.cc b/test/asan/TestCases/Linux/coverage-fork.cc
new file mode 100644
index 0000000..28b2a49
--- /dev/null
+++ b/test/asan/TestCases/Linux/coverage-fork.cc
@@ -0,0 +1,38 @@
+// RUN: %clangxx_asan -mllvm -asan-coverage=1 %s -o %t
+// RUN: export ASAN_OPTIONS=coverage=1:coverage_direct=0:verbosity=1
+// RUN: rm -rf %T/coverage-fork
+// RUN: mkdir -p %T/coverage-fork && cd %T/coverage-fork
+// RUN: %run %t 2>&1 | FileCheck %s
+//
+// XFAIL: android
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+__attribute__((noinline))
+void foo() { printf("foo\n"); }
+
+__attribute__((noinline))
+void bar() { printf("bar\n"); }
+
+__attribute__((noinline))
+void baz() { printf("baz\n"); }
+
+int main(int argc, char **argv) {
+  pid_t child_pid = fork();
+  if (child_pid == 0) {
+    fprintf(stderr, "Child PID: %d\n", getpid());
+    baz();
+  } else {
+    fprintf(stderr, "Parent PID: %d\n", getpid());
+    foo();
+    bar();
+  }
+  return 0;
+}
+
+// CHECK-DAG: Child PID: [[ChildPID:[0-9]+]]
+// CHECK-DAG: [[ChildPID]].sancov: 1 PCs written
+// CHECK-DAG: Parent PID: [[ParentPID:[0-9]+]]
+// CHECK-DAG: [[ParentPID]].sancov: 3 PCs written
diff --git a/test/asan/TestCases/Linux/coverage-maybe-open-file.cc b/test/asan/TestCases/Linux/coverage-maybe-open-file.cc
index 4665123..1cd2253 100644
--- a/test/asan/TestCases/Linux/coverage-maybe-open-file.cc
+++ b/test/asan/TestCases/Linux/coverage-maybe-open-file.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // RUN: %clangxx_asan -mllvm -asan-coverage=1 %s -o %t
 // RUN: rm -rf %T/coverage-maybe-open-file
 // RUN: mkdir -p %T/coverage-maybe-open-file && cd %T/coverage-maybe-open-file
diff --git a/test/asan/TestCases/Linux/glob.cc b/test/asan/TestCases/Linux/glob.cc
index c1dbeda..e0eeb33 100644
--- a/test/asan/TestCases/Linux/glob.cc
+++ b/test/asan/TestCases/Linux/glob.cc
@@ -1,5 +1,9 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // RUN: %clangxx_asan -O0 %s -o %t && %run %t %p 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O3 %s -o %t && %run %t %p 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #include <assert.h>
 #include <glob.h>
diff --git a/test/asan/TestCases/Linux/heavy_uar_test.cc b/test/asan/TestCases/Linux/heavy_uar_test.cc
index cedd69d..bfea520 100644
--- a/test/asan/TestCases/Linux/heavy_uar_test.cc
+++ b/test/asan/TestCases/Linux/heavy_uar_test.cc
@@ -3,6 +3,7 @@
 // RUN:   not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O2 %s -o %t && \
 // RUN:   not %run %t 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 // FIXME: Fix this test under GCC.
 // REQUIRES: Clang
diff --git a/test/asan/TestCases/Linux/interception_readdir_r_test.cc b/test/asan/TestCases/Linux/interception_readdir_r_test.cc
index 347320a..93b553c 100644
--- a/test/asan/TestCases/Linux/interception_readdir_r_test.cc
+++ b/test/asan/TestCases/Linux/interception_readdir_r_test.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // RUN: %clangxx_asan -O0 %s -DTEMP_DIR='"'"%T"'"' -o %t && %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O1 %s -DTEMP_DIR='"'"%T"'"' -o %t && %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O2 %s -DTEMP_DIR='"'"%T"'"' -o %t && %run %t 2>&1 | FileCheck %s
diff --git a/test/asan/TestCases/Linux/interface_symbols_linux.c b/test/asan/TestCases/Linux/interface_symbols_linux.c
index d496d95..72416f1 100644
--- a/test/asan/TestCases/Linux/interface_symbols_linux.c
+++ b/test/asan/TestCases/Linux/interface_symbols_linux.c
@@ -9,6 +9,7 @@
 // RUN:    | grep -v "__asan_stack_" \
 // RUN:    | grep -v "__asan_on_error" > %t.symbols
 // RUN: cat %p/../../../../lib/asan/asan_interface_internal.h \
+// RUN:     %p/../../../../lib/asan/asan_init_version.h \
 // RUN:    | sed "s/\/\/.*//" | sed "s/typedef.*//" \
 // RUN:    | grep -v "OPTIONAL" \
 // RUN:    | grep "__asan_.*(" | sed "s/.* __asan_/__asan_/;s/(.*//" \
diff --git a/test/asan/TestCases/Linux/kernel-area.cc b/test/asan/TestCases/Linux/kernel-area.cc
index 4b330f2..8dd509f 100644
--- a/test/asan/TestCases/Linux/kernel-area.cc
+++ b/test/asan/TestCases/Linux/kernel-area.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // Test that kernel area is not sanitized on 32-bit machines.
 //
 // RUN: %clangxx_asan %s -o %t
diff --git a/test/asan/TestCases/Linux/malloc_delete_mismatch.cc b/test/asan/TestCases/Linux/malloc_delete_mismatch.cc
index c9f61d8..085eb15 100644
--- a/test/asan/TestCases/Linux/malloc_delete_mismatch.cc
+++ b/test/asan/TestCases/Linux/malloc_delete_mismatch.cc
@@ -12,6 +12,7 @@
 // Also works if no malloc context is available.
 // RUN: ASAN_OPTIONS=alloc_dealloc_mismatch=1:malloc_context_size=0:fast_unwind_on_malloc=0 not %run %t 2>&1 | FileCheck %s
 // RUN: ASAN_OPTIONS=alloc_dealloc_mismatch=1:malloc_context_size=0:fast_unwind_on_malloc=1 not %run %t 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 #include <stdlib.h>
 
 static volatile char *x;
diff --git a/test/asan/TestCases/Linux/odr-violation.cc b/test/asan/TestCases/Linux/odr-violation.cc
index 91fa6d5..48e0907 100644
--- a/test/asan/TestCases/Linux/odr-violation.cc
+++ b/test/asan/TestCases/Linux/odr-violation.cc
@@ -1,15 +1,19 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // Different size: detect a bug if detect_odr_violation>=1
-// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so
-// RUN: %clangxx_asan %s %t.so -Wl,-R. -o %t
-// RUN: ASAN_OPTIONS=detect_odr_violation=1 not %run %t 2>&1 | FileCheck %s
-// RUN: ASAN_OPTIONS=detect_odr_violation=2 not %run %t 2>&1 | FileCheck %s
-// RUN: ASAN_OPTIONS=detect_odr_violation=0     %run %t 2>&1 | FileCheck %s --check-prefix=DISABLED
-// RUN:                                         %run %t 2>&1 | FileCheck %s --check-prefix=DISABLED
+// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t-ODR-SO.so
+// RUN: %clangxx_asan %s %t-ODR-SO.so -Wl,-R. -o %t-ODR-EXE
+// RUN: ASAN_OPTIONS=detect_odr_violation=1 not %run %t-ODR-EXE 2>&1 | FileCheck %s
+// RUN: ASAN_OPTIONS=detect_odr_violation=2 not %run %t-ODR-EXE 2>&1 | FileCheck %s
+// RUN: ASAN_OPTIONS=detect_odr_violation=0     %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED
+// RUN:                                     not %run %t-ODR-EXE 2>&1 | FileCheck %s
 //
 // Same size: report a bug only if detect_odr_violation>=2.
-// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -DSZ=100
-// RUN: ASAN_OPTIONS=detect_odr_violation=1     %run %t 2>&1 | FileCheck %s --check-prefix=DISABLED
-// RUN: ASAN_OPTIONS=detect_odr_violation=2 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t-ODR-SO.so -DSZ=100
+// RUN: ASAN_OPTIONS=detect_odr_violation=1     %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED
+// RUN: ASAN_OPTIONS=detect_odr_violation=2 not %run %t-ODR-EXE 2>&1 | FileCheck %s
+// RUN:                                     not %run %t-ODR-EXE 2>&1 | FileCheck %s
 
 // GNU driver doesn't handle .so files properly.
 // REQUIRES: Clang
@@ -30,4 +34,8 @@
 
 // CHECK: ERROR: AddressSanitizer: odr-violation
 // CHECK: size=100 G
+// CHECK: size={{4|100}} G
+// CHECK: These globals were registered at these points:
+// CHECK: ODR-EXE
+// CHECK: ODR-SO
 // DISABLED: PASS
diff --git a/test/asan/TestCases/Linux/preinit_test.cc b/test/asan/TestCases/Linux/preinit_test.cc
index ab62d63..10dde67 100644
--- a/test/asan/TestCases/Linux/preinit_test.cc
+++ b/test/asan/TestCases/Linux/preinit_test.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // RUN: %clangxx      -DFUNC=zzzz %s -shared -o %t.so -fPIC
 // RUN: %clangxx_asan -DFUNC=main %s         -o %t    -Wl,-R. %t.so
 // RUN: %run %t
diff --git a/test/asan/TestCases/Linux/ptrace.cc b/test/asan/TestCases/Linux/ptrace.cc
index 90086a0..45febd3 100644
--- a/test/asan/TestCases/Linux/ptrace.cc
+++ b/test/asan/TestCases/Linux/ptrace.cc
@@ -1,5 +1,9 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // RUN: %clangxx_asan -O0 %s -o %t && %run %t
 // RUN: %clangxx_asan -DPOSITIVE -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #include <assert.h>
 #include <stdio.h>
diff --git a/test/asan/TestCases/Linux/shmctl.cc b/test/asan/TestCases/Linux/shmctl.cc
index 5fc9b46..e1752bc 100644
--- a/test/asan/TestCases/Linux/shmctl.cc
+++ b/test/asan/TestCases/Linux/shmctl.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // RUN: %clangxx_asan -O1 %s -o %t && %run %t 2>&1
 // Regression test for
 // https://code.google.com/p/address-sanitizer/issues/detail?id=250
diff --git a/test/asan/TestCases/Linux/stack-trace-dlclose.cc b/test/asan/TestCases/Linux/stack-trace-dlclose.cc
index 3c6812b..44c8a03 100644
--- a/test/asan/TestCases/Linux/stack-trace-dlclose.cc
+++ b/test/asan/TestCases/Linux/stack-trace-dlclose.cc
@@ -1,6 +1,10 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // RUN: %clangxx_asan -DSHARED %s -shared -o %T/stack_trace_dlclose.so -fPIC
 // RUN: %clangxx_asan -DSO_DIR=\"%T\" %s -o %t
 // RUN: ASAN_OPTIONS=exitcode=0 %run %t 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #include <assert.h>
 #include <dlfcn.h>
diff --git a/test/asan/TestCases/Linux/stress_dtls.c b/test/asan/TestCases/Linux/stress_dtls.c
index 814adf3..cb901ee 100644
--- a/test/asan/TestCases/Linux/stress_dtls.c
+++ b/test/asan/TestCases/Linux/stress_dtls.c
@@ -9,7 +9,7 @@
 // RUN: %clangxx_asan -x c -DSO_NAME=f0 %s -shared -o %t-f0.so -fPIC
 // RUN: %clangxx_asan -x c -DSO_NAME=f1 %s -shared -o %t-f1.so -fPIC
 // RUN: %clangxx_asan -x c -DSO_NAME=f2 %s -shared -o %t-f2.so -fPIC
-// RUN: %clangxx_asan %s -ldl -lpthread -o %t
+// RUN: %clangxx_asan %s -ldl -pthread -o %t
 // RUN: %run %t 0 3
 // RUN: %run %t 2 3
 // RUN: ASAN_OPTIONS=verbosity=2 %run %t 10 2 2>&1 | FileCheck %s
@@ -31,7 +31,7 @@
 /*
 cc=your-compiler
 
-$cc stress_dtls.c -lpthread -ldl
+$cc stress_dtls.c -pthread -ldl
 for((i=0;i<100;i++)); do
   $cc -fPIC -shared -DSO_NAME=f$i -o a.out-f$i.so stress_dtls.c;
 done
diff --git a/test/asan/TestCases/Linux/syscalls.cc b/test/asan/TestCases/Linux/syscalls.cc
index ec14bca..bcdd5bc 100644
--- a/test/asan/TestCases/Linux/syscalls.cc
+++ b/test/asan/TestCases/Linux/syscalls.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s
 
diff --git a/test/asan/TestCases/Linux/tsd_dtor_leak.cc b/test/asan/TestCases/Linux/tsd_dtor_leak.cc
index fdac964..32253af 100644
--- a/test/asan/TestCases/Linux/tsd_dtor_leak.cc
+++ b/test/asan/TestCases/Linux/tsd_dtor_leak.cc
@@ -1,13 +1,13 @@
 // Regression test for a leak in tsd:
 // https://code.google.com/p/address-sanitizer/issues/detail?id=233
-// RUN: %clangxx_asan -O1 %s -lpthread -o %t
+// RUN: %clangxx_asan -O1 %s -pthread -o %t
 // RUN: ASAN_OPTIONS=quarantine_size=1 %run %t
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
+#include <sanitizer/allocator_interface.h>
 
-extern "C" size_t __asan_get_heap_size();
 static pthread_key_t tsd_key;
 
 void *Thread(void *) {
@@ -30,7 +30,7 @@
     pthread_t t;
     pthread_create(&t, 0, Thread, 0);
     pthread_join(t, 0);
-    size_t new_heap_size = __asan_get_heap_size();
+    size_t new_heap_size = __sanitizer_get_heap_size();
     fprintf(stderr, "heap size: new: %zd old: %zd\n", new_heap_size, old_heap_size);
     if (old_heap_size)
       assert(old_heap_size == new_heap_size);
diff --git a/test/asan/TestCases/Linux/uar_signals.cc b/test/asan/TestCases/Linux/uar_signals.cc
index 3c50fcf..f42c3f6 100644
--- a/test/asan/TestCases/Linux/uar_signals.cc
+++ b/test/asan/TestCases/Linux/uar_signals.cc
@@ -1,6 +1,7 @@
 // This test checks that the implementation of use-after-return
 // is async-signal-safe.
-// RUN: %clangxx_asan -O1 %s -o %t -lpthread && %run %t
+// RUN: %clangxx_asan -O1 %s -o %t -pthread && %run %t
+// REQUIRES: stable-runtime
 #include <signal.h>
 #include <stdlib.h>
 #include <stdio.h>
diff --git a/test/asan/TestCases/Linux/unpoison_tls.cc b/test/asan/TestCases/Linux/unpoison_tls.cc
index 4c693b4..9c1d74b 100644
--- a/test/asan/TestCases/Linux/unpoison_tls.cc
+++ b/test/asan/TestCases/Linux/unpoison_tls.cc
@@ -1,7 +1,7 @@
 // Test that TLS is unpoisoned on thread death.
 // REQUIRES: x86_64-supported-target,i386-supported-target
 
-// RUN: %clangxx_asan -O1 %s -lpthread -o %t && %run %t 2>&1
+// RUN: %clangxx_asan -O1 %s -pthread -o %t && %run %t 2>&1
 
 #include <assert.h>
 #include <pthread.h>
diff --git a/test/asan/TestCases/asan-symbolize-sanity-test.cc b/test/asan/TestCases/Posix/asan-symbolize-sanity-test.cc
similarity index 92%
rename from test/asan/TestCases/asan-symbolize-sanity-test.cc
rename to test/asan/TestCases/Posix/asan-symbolize-sanity-test.cc
index 1aa41b6..84dc1c2 100644
--- a/test/asan/TestCases/asan-symbolize-sanity-test.cc
+++ b/test/asan/TestCases/Posix/asan-symbolize-sanity-test.cc
@@ -1,9 +1,13 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // Check that asan_symbolize.py script works (for binaries, ASan RTL and
 // shared object files.
 
 // RUN: %clangxx_asan -O0 -DSHARED_LIB %s -fPIC -shared -o %t-so.so
 // RUN: %clangxx_asan -O0 %s -ldl -o %t
 // RUN: env ASAN_OPTIONS=symbolize=0 not %run %t 2>&1 | %asan_symbolize | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #if !defined(SHARED_LIB)
 #include <dlfcn.h>
diff --git a/test/asan/TestCases/Posix/init-order-dlopen.cc b/test/asan/TestCases/Posix/init-order-dlopen.cc
index f7b0d6a..2b86ace 100644
--- a/test/asan/TestCases/Posix/init-order-dlopen.cc
+++ b/test/asan/TestCases/Posix/init-order-dlopen.cc
@@ -10,8 +10,8 @@
 // If the linker doesn't support --export-dynamic (which is ELF-specific),
 // try to link without that option.
 // FIXME: find a better solution.
-// RUN: %clangxx_asan -O0 %s -lpthread -ldl -o %t -Wl,--export-dynamic || \
-// RUN:     %clangxx_asan -O0 %s -lpthread -ldl -o %t
+// RUN: %clangxx_asan -O0 %s -pthread -ldl -o %t -Wl,--export-dynamic || \
+// RUN:     %clangxx_asan -O0 %s -pthread -ldl -o %t
 // RUN: ASAN_OPTIONS=strict_init_order=true %run %t 2>&1 | FileCheck %s
 #if !defined(SHARED_LIB)
 #include <dlfcn.h>
diff --git a/test/asan/TestCases/Posix/shared-lib-test.cc b/test/asan/TestCases/Posix/shared-lib-test.cc
index ccaeed6..21f26b3 100644
--- a/test/asan/TestCases/Posix/shared-lib-test.cc
+++ b/test/asan/TestCases/Posix/shared-lib-test.cc
@@ -6,6 +6,7 @@
 // RUN: %clangxx_asan -O2 %s -ldl -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_asan -O3 -DSHARED_LIB %s -fPIC -shared -o %t-so.so
 // RUN: %clangxx_asan -O3 %s -ldl -o %t && not %run %t 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #if !defined(SHARED_LIB)
 #include <dlfcn.h>
diff --git a/test/asan/TestCases/Posix/start-deactivated.cc b/test/asan/TestCases/Posix/start-deactivated.cc
index ac45c16..43023fb 100644
--- a/test/asan/TestCases/Posix/start-deactivated.cc
+++ b/test/asan/TestCases/Posix/start-deactivated.cc
@@ -6,6 +6,7 @@
 // RUN: %clangxx -O0 %s -c -o %t.o
 // RUN: %clangxx_asan -O0 %t.o -ldl -o %t
 // RUN: ASAN_OPTIONS=start_deactivated=1 not %run %t 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #if !defined(SHARED_LIB)
 #include <dlfcn.h>
diff --git a/test/asan/TestCases/Windows/aligned_mallocs.cc b/test/asan/TestCases/Windows/aligned_mallocs.cc
index bbbc423..df740b6 100644
--- a/test/asan/TestCases/Windows/aligned_mallocs.cc
+++ b/test/asan/TestCases/Windows/aligned_mallocs.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/allocators_sanity.cc b/test/asan/TestCases/Windows/allocators_sanity.cc
index 55cfdff..66a862d 100644
--- a/test/asan/TestCases/Windows/allocators_sanity.cc
+++ b/test/asan/TestCases/Windows/allocators_sanity.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/beginthreadex.cc b/test/asan/TestCases/Windows/beginthreadex.cc
index 12e2b1f..f2b2b45 100644
--- a/test/asan/TestCases/Windows/beginthreadex.cc
+++ b/test/asan/TestCases/Windows/beginthreadex.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/bitfield.cc b/test/asan/TestCases/Windows/bitfield.cc
index e1a3fc8..253a759 100644
--- a/test/asan/TestCases/Windows/bitfield.cc
+++ b/test/asan/TestCases/Windows/bitfield.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/bitfield_uaf.cc b/test/asan/TestCases/Windows/bitfield_uaf.cc
index 63f3941..f49d671 100644
--- a/test/asan/TestCases/Windows/bitfield_uaf.cc
+++ b/test/asan/TestCases/Windows/bitfield_uaf.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/calloc_left_oob.cc b/test/asan/TestCases/Windows/calloc_left_oob.cc
index 71399a7..459025b 100644
--- a/test/asan/TestCases/Windows/calloc_left_oob.cc
+++ b/test/asan/TestCases/Windows/calloc_left_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/calloc_right_oob.cc b/test/asan/TestCases/Windows/calloc_right_oob.cc
index fcf3138..c976b87 100644
--- a/test/asan/TestCases/Windows/calloc_right_oob.cc
+++ b/test/asan/TestCases/Windows/calloc_right_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/calloc_uaf.cc b/test/asan/TestCases/Windows/calloc_uaf.cc
index 5a1d936..db5e707 100644
--- a/test/asan/TestCases/Windows/calloc_uaf.cc
+++ b/test/asan/TestCases/Windows/calloc_uaf.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/crt_initializers.cc b/test/asan/TestCases/Windows/crt_initializers.cc
index 5544912..084f8a4 100644
--- a/test/asan/TestCases/Windows/crt_initializers.cc
+++ b/test/asan/TestCases/Windows/crt_initializers.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t | FileCheck %s
 
 // This is a test for http://code.google.com/p/address-sanitizer/issues/detail?id=305
diff --git a/test/asan/TestCases/Windows/dll_aligned_mallocs.cc b/test/asan/TestCases/Windows/dll_aligned_mallocs.cc
index db2b9d1..8b2c4d6 100644
--- a/test/asan/TestCases/Windows/dll_aligned_mallocs.cc
+++ b/test/asan/TestCases/Windows/dll_aligned_mallocs.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: %run %t %t.dll | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/dll_allocators_sanity.cc b/test/asan/TestCases/Windows/dll_allocators_sanity.cc
index 2f3f78f..1d31f37 100644
--- a/test/asan/TestCases/Windows/dll_allocators_sanity.cc
+++ b/test/asan/TestCases/Windows/dll_allocators_sanity.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: %run %t %t.dll | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/dll_cerr.cc b/test/asan/TestCases/Windows/dll_cerr.cc
new file mode 100644
index 0000000..8f1a699
--- /dev/null
+++ b/test/asan/TestCases/Windows/dll_cerr.cc
@@ -0,0 +1,23 @@
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
+// RUN: %run %t %t.dll 2>&1 | FileCheck %s
+
+// Test that it works correctly even with ICF enabled.
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll -link /OPT:REF /OPT:ICF
+// RUN: %run %t %t.dll 2>&1 | FileCheck %s
+
+#include <iostream>
+
+extern "C" __declspec(dllexport)
+int test_function() {
+  // Just make sure we can use cout.
+  std::cout << "All ok\n";
+// CHECK: All ok
+
+  // This line forces a declaration of some global basic_ostream internal object that
+  // calls memcpy() in its constructor.  This doesn't work if __asan_init is not
+  // called early enough.
+  std::cout << 42;
+// CHECK: 42
+  return 0;
+}
diff --git a/test/asan/TestCases/Windows/dll_host.cc b/test/asan/TestCases/Windows/dll_host.cc
index 470d3fa..5eb710e 100644
--- a/test/asan/TestCases/Windows/dll_host.cc
+++ b/test/asan/TestCases/Windows/dll_host.cc
@@ -2,7 +2,7 @@
 //
 // Just make sure we can compile this.
 // The actual compile&run sequence is to be done by the DLL tests.
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 //
 // Get the list of ASan wrappers exported by the main module RTL:
 // RUN: dumpbin /EXPORTS %t | grep -o "__asan_wrap[^ ]*" | grep -v @ | sort | uniq > %t.exported_wrappers
diff --git a/test/asan/TestCases/Windows/dll_intercept_memchr.cc b/test/asan/TestCases/Windows/dll_intercept_memchr.cc
new file mode 100644
index 0000000..1435bdc
--- /dev/null
+++ b/test/asan/TestCases/Windows/dll_intercept_memchr.cc
@@ -0,0 +1,21 @@
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
+// RUN: not %run %t %t.dll 2>&1 | FileCheck %s
+
+#include <string.h>
+
+extern "C" __declspec(dllexport)
+int test_function() {
+  char buff[6] = "Hello";
+
+  memchr(buff, 'z', 7);
+// CHECK: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+// CHECK: READ of size 7 at [[ADDR]] thread T0
+// CHECK-NEXT:  __asan_wrap_memchr
+// CHECK-NEXT:  memchr
+// CHECK-NEXT:  test_function {{.*}}dll_intercept_memchr.cc:[[@LINE-5]]
+// CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
+// CHECK-NEXT:  test_function {{.*}}dll_intercept_memchr.cc
+// CHECK: 'buff' <== Memory access at offset {{.*}} overflows this variable
+  return 0;
+}
diff --git a/test/asan/TestCases/Windows/dll_intercept_memcpy.cc b/test/asan/TestCases/Windows/dll_intercept_memcpy.cc
index 8a69e7d..7ad0d82 100644
--- a/test/asan/TestCases/Windows/dll_intercept_memcpy.cc
+++ b/test/asan/TestCases/Windows/dll_intercept_memcpy.cc
@@ -1,33 +1,32 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
+// RUN: not %run %t %t.dll 2>&1 | FileCheck %s
+
+// Test that it works correctly even with ICF enabled.
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll -link /OPT:REF /OPT:ICF
 // RUN: not %run %t %t.dll 2>&1 | FileCheck %s
 
 #include <stdio.h>
 #include <string.h>
 
-void call_memcpy(void* (*f)(void *, const void *, size_t),
-                 void *a, const void *b, size_t c) {
-  f(a, b, c);
-}
-
 extern "C" __declspec(dllexport)
 int test_function() {
   char buff1[6] = "Hello", buff2[5];
 
-  call_memcpy(&memcpy, buff2, buff1, 5);
+  memcpy(buff2, buff1, 5);
   if (buff1[2] != buff2[2])
     return 2;
   printf("Initial test OK\n");
   fflush(0);
 // CHECK: Initial test OK
 
-  call_memcpy(&memcpy, buff2, buff1, 6);
+  memcpy(buff2, buff1, 6);
 // CHECK: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
 // CHECK: WRITE of size 6 at [[ADDR]] thread T0
 // CHECK-NEXT:  __asan_memcpy
-// CHECK-NEXT:  call_memcpy
-// CHECK-NEXT:  test_function {{.*}}dll_intercept_memcpy.cc:[[@LINE-5]]
+// CHECK-NEXT:  test_function {{.*}}dll_intercept_memcpy.cc:[[@LINE-4]]
 // CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
 // CHECK-NEXT:  test_function {{.*}}dll_intercept_memcpy.cc
 // CHECK: 'buff2' <== Memory access at offset {{.*}} overflows this variable
+  return 0;
 }
diff --git a/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc b/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc
new file mode 100644
index 0000000..0c4bfba
--- /dev/null
+++ b/test/asan/TestCases/Windows/dll_intercept_memcpy_indirect.cc
@@ -0,0 +1,34 @@
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
+// RUN: not %run %t %t.dll 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <string.h>
+
+void call_memcpy(void* (*f)(void *, const void *, size_t),
+                 void *a, const void *b, size_t c) {
+  f(a, b, c);
+}
+
+extern "C" __declspec(dllexport)
+int test_function() {
+  char buff1[6] = "Hello", buff2[5];
+
+  call_memcpy(&memcpy, buff2, buff1, 5);
+  if (buff1[2] != buff2[2])
+    return 2;
+  printf("Initial test OK\n");
+  fflush(0);
+// CHECK: Initial test OK
+
+  call_memcpy(&memcpy, buff2, buff1, 6);
+// CHECK: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+// CHECK: WRITE of size 6 at [[ADDR]] thread T0
+// CHECK-NEXT:  __asan_memcpy
+// CHECK-NEXT:  call_memcpy
+// CHECK-NEXT:  test_function {{.*}}dll_intercept_memcpy_indirect.cc:[[@LINE-5]]
+// CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
+// CHECK-NEXT:  test_function {{.*}}dll_intercept_memcpy_indirect.cc
+// CHECK: 'buff2' <== Memory access at offset {{.*}} overflows this variable
+  return 0;
+}
diff --git a/test/asan/TestCases/Windows/dll_intercept_memset.cc b/test/asan/TestCases/Windows/dll_intercept_memset.cc
new file mode 100644
index 0000000..d4be376
--- /dev/null
+++ b/test/asan/TestCases/Windows/dll_intercept_memset.cc
@@ -0,0 +1,32 @@
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
+// RUN: not %run %t %t.dll 2>&1 | FileCheck %s
+
+// Test that it works correctly even with ICF enabled.
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll -link /OPT:REF /OPT:ICF
+// RUN: not %run %t %t.dll 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <string.h>
+
+extern "C" __declspec(dllexport)
+int test_function() {
+  char buff[5] = "aaaa";
+
+  memset(buff, 'b', 5);
+  if (buff[2] != 'b')
+    return 2;
+  printf("Initial test OK\n");
+  fflush(0);
+// CHECK: Initial test OK
+
+  memset(buff, 'c', 6);
+// CHECK: AddressSanitizer: stack-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
+// CHECK: WRITE of size 6 at [[ADDR]] thread T0
+// CHECK-NEXT:  __asan_memset
+// CHECK-NEXT:  test_function {{.*}}dll_intercept_memset.cc:[[@LINE-4]]
+// CHECK: Address [[ADDR]] is located in stack of thread T0 at offset {{.*}} in frame
+// CHECK-NEXT:  test_function {{.*}}dll_intercept_memset.cc
+// CHECK: 'buff' <== Memory access at offset {{.*}} overflows this variable
+  return 0;
+}
diff --git a/test/asan/TestCases/Windows/dll_intercept_strlen.cc b/test/asan/TestCases/Windows/dll_intercept_strlen.cc
index caac426..f41d478 100644
--- a/test/asan/TestCases/Windows/dll_intercept_strlen.cc
+++ b/test/asan/TestCases/Windows/dll_intercept_strlen.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: not %run %t %t.dll 2>&1 | FileCheck %s
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/dll_malloc_left_oob.cc b/test/asan/TestCases/Windows/dll_malloc_left_oob.cc
index 8064b83..0653ea4 100644
--- a/test/asan/TestCases/Windows/dll_malloc_left_oob.cc
+++ b/test/asan/TestCases/Windows/dll_malloc_left_oob.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: not %run %t %t.dll 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/dll_malloc_uaf.cc b/test/asan/TestCases/Windows/dll_malloc_uaf.cc
index 5842999..b286380 100644
--- a/test/asan/TestCases/Windows/dll_malloc_uaf.cc
+++ b/test/asan/TestCases/Windows/dll_malloc_uaf.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: not %run %t %t.dll 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/dll_noreturn.cc b/test/asan/TestCases/Windows/dll_noreturn.cc
index 4b5bf33..6ec9072 100644
--- a/test/asan/TestCases/Windows/dll_noreturn.cc
+++ b/test/asan/TestCases/Windows/dll_noreturn.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: not %run %t %t.dll 2>&1 | FileCheck %s
 
 #include <process.h>
diff --git a/test/asan/TestCases/Windows/dll_operator_array_new_left_oob.cc b/test/asan/TestCases/Windows/dll_operator_array_new_left_oob.cc
index 509e0af..e52345e 100644
--- a/test/asan/TestCases/Windows/dll_operator_array_new_left_oob.cc
+++ b/test/asan/TestCases/Windows/dll_operator_array_new_left_oob.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: not %run %t %t.dll 2>&1 | FileCheck %s
 
 extern "C" __declspec(dllexport)
diff --git a/test/asan/TestCases/Windows/dll_operator_array_new_with_dtor_left_oob.cc b/test/asan/TestCases/Windows/dll_operator_array_new_with_dtor_left_oob.cc
index 09302c3..c61d4eb 100644
--- a/test/asan/TestCases/Windows/dll_operator_array_new_with_dtor_left_oob.cc
+++ b/test/asan/TestCases/Windows/dll_operator_array_new_with_dtor_left_oob.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: not %run %t %t.dll 2>&1 | FileCheck %s
 
 struct C {
diff --git a/test/asan/TestCases/Windows/dll_poison_unpoison.cc b/test/asan/TestCases/Windows/dll_poison_unpoison.cc
index 24f98dd..d486cb1 100644
--- a/test/asan/TestCases/Windows/dll_poison_unpoison.cc
+++ b/test/asan/TestCases/Windows/dll_poison_unpoison.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: not %run %t %t.dll 2>&1 | FileCheck %s
 
 #include <sanitizer/asan_interface.h>
diff --git a/test/asan/TestCases/Windows/dll_stack_use_after_return.cc b/test/asan/TestCases/Windows/dll_stack_use_after_return.cc
index 22d9970..6cd74c2 100644
--- a/test/asan/TestCases/Windows/dll_stack_use_after_return.cc
+++ b/test/asan/TestCases/Windows/dll_stack_use_after_return.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: env ASAN_OPTIONS=detect_stack_use_after_return=1 not %run %t %t.dll 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/dll_thread_stack_array_left_oob.cc b/test/asan/TestCases/Windows/dll_thread_stack_array_left_oob.cc
index a84eeb9..8f53623 100644
--- a/test/asan/TestCases/Windows/dll_thread_stack_array_left_oob.cc
+++ b/test/asan/TestCases/Windows/dll_thread_stack_array_left_oob.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_asan -O0 %p/dll_host.cc -Fe%t
-// RUN: %clangxx_asan -LD -O0 %s -Fe%t.dll
+// RUN: %clang_cl_asan -O0 %p/dll_host.cc -Fe%t
+// RUN: %clang_cl_asan -LD -O0 %s -Fe%t.dll
 // RUN: not %run %t %t.dll 2>&1 | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/double_free.cc b/test/asan/TestCases/Windows/double_free.cc
index 6745c59..18a9fcb 100644
--- a/test/asan/TestCases/Windows/double_free.cc
+++ b/test/asan/TestCases/Windows/double_free.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/double_operator_delete.cc b/test/asan/TestCases/Windows/double_operator_delete.cc
index 1cd7080..55a6d09 100644
--- a/test/asan/TestCases/Windows/double_operator_delete.cc
+++ b/test/asan/TestCases/Windows/double_operator_delete.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/global_const_string.cc b/test/asan/TestCases/Windows/global_const_string.cc
index 6d17b24..8c147c9 100644
--- a/test/asan/TestCases/Windows/global_const_string.cc
+++ b/test/asan/TestCases/Windows/global_const_string.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/global_const_string_oob.cc b/test/asan/TestCases/Windows/global_const_string_oob.cc
index b54a2bb..b39e3db 100644
--- a/test/asan/TestCases/Windows/global_const_string_oob.cc
+++ b/test/asan/TestCases/Windows/global_const_string_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <windows.h>
@@ -13,7 +13,7 @@
 // CHECK: AddressSanitizer: global-buffer-overflow on address [[ADDR:0x[0-9a-f]+]]
 // CHECK: READ of size 1 at [[ADDR]] thread T0
 // CHECK-NEXT:   {{#0 .* main .*global_const_string_oob.cc:}}[[@LINE-5]]
-// CHECK: [[ADDR]] is located 5 bytes to the right of global variable [[STR:.*]] from {{'.*global_const_string_oob.cc' .*}} of size 11
+// CHECK: [[ADDR]] is located 5 bytes to the right of global variable [[STR:.*]] defined in {{'.*global_const_string_oob.cc:7:.*' .*}} of size 11
 // CHECK:   [[STR]] is ascii string 'foobarspam'
   return 0;
 }
diff --git a/test/asan/TestCases/Windows/hello_world.cc b/test/asan/TestCases/Windows/hello_world.cc
index 2ef37d0..400ca1b 100644
--- a/test/asan/TestCases/Windows/hello_world.cc
+++ b/test/asan/TestCases/Windows/hello_world.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t | FileCheck %s
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/intercept_memcpy.cc b/test/asan/TestCases/Windows/intercept_memcpy.cc
index 73ce951..89eb175 100644
--- a/test/asan/TestCases/Windows/intercept_memcpy.cc
+++ b/test/asan/TestCases/Windows/intercept_memcpy.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/intercept_strdup.cc b/test/asan/TestCases/Windows/intercept_strdup.cc
index 1bc235f..0a40d86 100644
--- a/test/asan/TestCases/Windows/intercept_strdup.cc
+++ b/test/asan/TestCases/Windows/intercept_strdup.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/intercept_strlen.cc b/test/asan/TestCases/Windows/intercept_strlen.cc
index 62a5be8..928a286 100644
--- a/test/asan/TestCases/Windows/intercept_strlen.cc
+++ b/test/asan/TestCases/Windows/intercept_strlen.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/malloc_left_oob.cc b/test/asan/TestCases/Windows/malloc_left_oob.cc
index 2f6516e..ec133c3 100644
--- a/test/asan/TestCases/Windows/malloc_left_oob.cc
+++ b/test/asan/TestCases/Windows/malloc_left_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/malloc_right_oob.cc b/test/asan/TestCases/Windows/malloc_right_oob.cc
index ba1bf93..9975316 100644
--- a/test/asan/TestCases/Windows/malloc_right_oob.cc
+++ b/test/asan/TestCases/Windows/malloc_right_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/malloc_uaf.cc b/test/asan/TestCases/Windows/malloc_uaf.cc
index 3f873cc..f584789 100644
--- a/test/asan/TestCases/Windows/malloc_uaf.cc
+++ b/test/asan/TestCases/Windows/malloc_uaf.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/operator_array_new_left_oob.cc b/test/asan/TestCases/Windows/operator_array_new_left_oob.cc
index 082cf4c..81b709f 100644
--- a/test/asan/TestCases/Windows/operator_array_new_left_oob.cc
+++ b/test/asan/TestCases/Windows/operator_array_new_left_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 int main() {
diff --git a/test/asan/TestCases/Windows/operator_array_new_right_oob.cc b/test/asan/TestCases/Windows/operator_array_new_right_oob.cc
index 1465fa0..079c78e 100644
--- a/test/asan/TestCases/Windows/operator_array_new_right_oob.cc
+++ b/test/asan/TestCases/Windows/operator_array_new_right_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/operator_array_new_uaf.cc b/test/asan/TestCases/Windows/operator_array_new_uaf.cc
index f48d7a6..1817996 100644
--- a/test/asan/TestCases/Windows/operator_array_new_uaf.cc
+++ b/test/asan/TestCases/Windows/operator_array_new_uaf.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/operator_array_new_with_dtor_left_oob.cc b/test/asan/TestCases/Windows/operator_array_new_with_dtor_left_oob.cc
index 1702b51..c5bdba5 100644
--- a/test/asan/TestCases/Windows/operator_array_new_with_dtor_left_oob.cc
+++ b/test/asan/TestCases/Windows/operator_array_new_with_dtor_left_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 struct C {
diff --git a/test/asan/TestCases/Windows/operator_delete_wrong_argument.cc b/test/asan/TestCases/Windows/operator_delete_wrong_argument.cc
index 7358cba..c3e7dac 100644
--- a/test/asan/TestCases/Windows/operator_delete_wrong_argument.cc
+++ b/test/asan/TestCases/Windows/operator_delete_wrong_argument.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/operator_new_left_oob.cc b/test/asan/TestCases/Windows/operator_new_left_oob.cc
index 77454fa..c077f11 100644
--- a/test/asan/TestCases/Windows/operator_new_left_oob.cc
+++ b/test/asan/TestCases/Windows/operator_new_left_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/operator_new_right_oob.cc b/test/asan/TestCases/Windows/operator_new_right_oob.cc
index e6df9c9..7a66d17 100644
--- a/test/asan/TestCases/Windows/operator_new_right_oob.cc
+++ b/test/asan/TestCases/Windows/operator_new_right_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/operator_new_uaf.cc b/test/asan/TestCases/Windows/operator_new_uaf.cc
index 350598a..c435458 100644
--- a/test/asan/TestCases/Windows/operator_new_uaf.cc
+++ b/test/asan/TestCases/Windows/operator_new_uaf.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/realloc_left_oob.cc b/test/asan/TestCases/Windows/realloc_left_oob.cc
index 511e509..7d30e1d 100644
--- a/test/asan/TestCases/Windows/realloc_left_oob.cc
+++ b/test/asan/TestCases/Windows/realloc_left_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/realloc_right_oob.cc b/test/asan/TestCases/Windows/realloc_right_oob.cc
index 3b0ad19..f741390 100644
--- a/test/asan/TestCases/Windows/realloc_right_oob.cc
+++ b/test/asan/TestCases/Windows/realloc_right_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/realloc_uaf.cc b/test/asan/TestCases/Windows/realloc_uaf.cc
index 45c5598..c5b6953 100644
--- a/test/asan/TestCases/Windows/realloc_uaf.cc
+++ b/test/asan/TestCases/Windows/realloc_uaf.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/stack_array_left_oob.cc b/test/asan/TestCases/Windows/stack_array_left_oob.cc
index 65385e2..040d855 100644
--- a/test/asan/TestCases/Windows/stack_array_left_oob.cc
+++ b/test/asan/TestCases/Windows/stack_array_left_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/stack_array_right_oob.cc b/test/asan/TestCases/Windows/stack_array_right_oob.cc
index ac267bf..a370246 100644
--- a/test/asan/TestCases/Windows/stack_array_right_oob.cc
+++ b/test/asan/TestCases/Windows/stack_array_right_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/stack_array_sanity.cc b/test/asan/TestCases/Windows/stack_array_sanity.cc
index 7dc75f8..1aef1a9 100644
--- a/test/asan/TestCases/Windows/stack_array_sanity.cc
+++ b/test/asan/TestCases/Windows/stack_array_sanity.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t | FileCheck %s
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/Windows/stack_use_after_return.cc b/test/asan/TestCases/Windows/stack_use_after_return.cc
index 1eb6442..7955f26 100644
--- a/test/asan/TestCases/Windows/stack_use_after_return.cc
+++ b/test/asan/TestCases/Windows/stack_use_after_return.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: env ASAN_OPTIONS=detect_stack_use_after_return=1 not %run %t 2>&1 | FileCheck %s
 
 char *x;
diff --git a/test/asan/TestCases/Windows/thread_simple.cc b/test/asan/TestCases/Windows/thread_simple.cc
index 6afb0bf..14bb82f 100644
--- a/test/asan/TestCases/Windows/thread_simple.cc
+++ b/test/asan/TestCases/Windows/thread_simple.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/thread_stack_array_left_oob.cc b/test/asan/TestCases/Windows/thread_stack_array_left_oob.cc
index 30e8ce0..17b9b1b 100644
--- a/test/asan/TestCases/Windows/thread_stack_array_left_oob.cc
+++ b/test/asan/TestCases/Windows/thread_stack_array_left_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/thread_stack_array_right_oob.cc b/test/asan/TestCases/Windows/thread_stack_array_right_oob.cc
index 365288d..601a1b8 100644
--- a/test/asan/TestCases/Windows/thread_stack_array_right_oob.cc
+++ b/test/asan/TestCases/Windows/thread_stack_array_right_oob.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/thread_stack_reuse.cc b/test/asan/TestCases/Windows/thread_stack_reuse.cc
index 49611d9..7da3a80 100644
--- a/test/asan/TestCases/Windows/thread_stack_reuse.cc
+++ b/test/asan/TestCases/Windows/thread_stack_reuse.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/thread_stress.cc b/test/asan/TestCases/Windows/thread_stress.cc
index 3a0d9c5..74be8d8 100644
--- a/test/asan/TestCases/Windows/thread_stress.cc
+++ b/test/asan/TestCases/Windows/thread_stress.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t
 
 #include <windows.h>
diff --git a/test/asan/TestCases/Windows/use_after_realloc.cc b/test/asan/TestCases/Windows/use_after_realloc.cc
index 6bd722b..9d2c025 100644
--- a/test/asan/TestCases/Windows/use_after_realloc.cc
+++ b/test/asan/TestCases/Windows/use_after_realloc.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 #include <malloc.h>
diff --git a/test/asan/TestCases/Windows/windows_h.cc b/test/asan/TestCases/Windows/windows_h.cc
index c221185..40cf5a1 100644
--- a/test/asan/TestCases/Windows/windows_h.cc
+++ b/test/asan/TestCases/Windows/windows_h.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: %run %t
 
 // Just make sure we can parse <windows.h>
diff --git a/test/asan/TestCases/Windows/wrong_downcast_on_heap.cc b/test/asan/TestCases/Windows/wrong_downcast_on_heap.cc
index 0f43a6a..112dd53 100644
--- a/test/asan/TestCases/Windows/wrong_downcast_on_heap.cc
+++ b/test/asan/TestCases/Windows/wrong_downcast_on_heap.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 class Parent {
diff --git a/test/asan/TestCases/Windows/wrong_downcast_on_stack.cc b/test/asan/TestCases/Windows/wrong_downcast_on_stack.cc
index 02c9b9f..2859ecc 100644
--- a/test/asan/TestCases/Windows/wrong_downcast_on_stack.cc
+++ b/test/asan/TestCases/Windows/wrong_downcast_on_stack.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_asan -O0 %s -Fe%t
+// RUN: %clang_cl_asan -O0 %s -Fe%t
 // RUN: not %run %t 2>&1 | FileCheck %s
 
 class Parent {
diff --git a/test/asan/TestCases/current_allocated_bytes.cc b/test/asan/TestCases/current_allocated_bytes.cc
index a9cd159..c49e433 100644
--- a/test/asan/TestCases/current_allocated_bytes.cc
+++ b/test/asan/TestCases/current_allocated_bytes.cc
@@ -1,9 +1,10 @@
-// RUN: %clangxx_asan -O0 %s -lpthread -o %t && %run %t
-// RUN: %clangxx_asan -O2 %s -lpthread -o %t && %run %t
+// RUN: %clangxx_asan -O0 %s -pthread -o %t && %run %t
+// RUN: %clangxx_asan -O2 %s -pthread -o %t && %run %t
+// REQUIRES: stable-runtime
 
 #include <assert.h>
 #include <pthread.h>
-#include <sanitizer/asan_interface.h>
+#include <sanitizer/allocator_interface.h>
 #include <stdio.h>
 #include <stdlib.h>
 
@@ -16,12 +17,12 @@
 }
 
 void* check_stats(void *arg) {
-  assert(__asan_get_current_allocated_bytes() > 0);
+  assert(__sanitizer_get_current_allocated_bytes() > 0);
   return 0;
 }
 
 int main() {
-  size_t used_mem = __asan_get_current_allocated_bytes();
+  size_t used_mem = __sanitizer_get_current_allocated_bytes();
   printf("Before: %zu\n", used_mem);
   const int kNumIterations = 1000;
   for (int iter = 0; iter < kNumIterations; iter++) {
@@ -32,7 +33,7 @@
     }
     for (int j = 0; j < 4; j++)
       assert(0 == pthread_join(thr[j], 0));
-    used_mem = __asan_get_current_allocated_bytes();
+    used_mem = __sanitizer_get_current_allocated_bytes();
     if (used_mem > kLargeAlloc) {
       printf("After iteration %d: %zu\n", iter, used_mem);
       return 1;
diff --git a/test/asan/TestCases/deep_stack_uaf.cc b/test/asan/TestCases/deep_stack_uaf.cc
index 9ce27d5..accb70c 100644
--- a/test/asan/TestCases/deep_stack_uaf.cc
+++ b/test/asan/TestCases/deep_stack_uaf.cc
@@ -2,6 +2,7 @@
 
 // RUN: %clangxx_asan -O0 %s -o %t 2>&1
 // RUN: env ASAN_OPTIONS=malloc_context_size=120:redzone=512 not %run %t 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 #include <stdlib.h>
 #include <stdio.h>
 
diff --git a/test/asan/TestCases/deep_thread_stack.cc b/test/asan/TestCases/deep_thread_stack.cc
index 1e802b0..535da79 100644
--- a/test/asan/TestCases/deep_thread_stack.cc
+++ b/test/asan/TestCases/deep_thread_stack.cc
@@ -1,7 +1,8 @@
-// RUN: %clangxx_asan -O0 %s -lpthread -o %t && not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O1 %s -lpthread -o %t && not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O2 %s -lpthread -o %t && not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O3 %s -lpthread -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O0 %s -pthread -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O1 %s -pthread -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O2 %s -pthread -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O3 %s -pthread -o %t && not %run %t 2>&1 | FileCheck %s
+// REQUIRES: stable-runtime
 
 #include <pthread.h>
 
diff --git a/test/asan/TestCases/default_blacklist.cc b/test/asan/TestCases/default_blacklist.cc
index 25a1ae1..9358cc4 100644
--- a/test/asan/TestCases/default_blacklist.cc
+++ b/test/asan/TestCases/default_blacklist.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // Test that ASan uses the default blacklist from resource directory.
 // RUN: %clangxx_asan -### %s 2>&1 | FileCheck %s
 // CHECK: fsanitize-blacklist={{.*}}asan_blacklist.txt
diff --git a/test/asan/TestCases/default_options.cc b/test/asan/TestCases/default_options.cc
index c77dd49..6453f66 100644
--- a/test/asan/TestCases/default_options.cc
+++ b/test/asan/TestCases/default_options.cc
@@ -1,6 +1,9 @@
 // RUN: %clangxx_asan -O2 %s -o %t
 // RUN: %run %t 2>&1 | FileCheck %s
 
+// __asan_default_options() are not supported on Windows.
+// XFAIL: win32
+
 const char *kAsanDefaultOptions="verbosity=1 foo=bar";
 
 extern "C"
diff --git a/test/asan/TestCases/double-free.cc b/test/asan/TestCases/double-free.cc
index 6bbca24..212d7ea 100644
--- a/test/asan/TestCases/double-free.cc
+++ b/test/asan/TestCases/double-free.cc
@@ -4,6 +4,7 @@
 // Also works if no malloc context is available.
 // RUN: env ASAN_OPTIONS=malloc_context_size=0:fast_unwind_on_malloc=0 not %run %t 2>&1 | FileCheck %s
 // RUN: env ASAN_OPTIONS=malloc_context_size=0:fast_unwind_on_malloc=1 not %run %t 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/asan/TestCases/free_hook_realloc.cc b/test/asan/TestCases/free_hook_realloc.cc
index 2040cad..4b27532 100644
--- a/test/asan/TestCases/free_hook_realloc.cc
+++ b/test/asan/TestCases/free_hook_realloc.cc
@@ -1,13 +1,18 @@
 // Check that free hook doesn't conflict with Realloc.
 // RUN: %clangxx_asan -O2 %s -o %t
 // RUN: %run %t 2>&1 | FileCheck %s
+
+// Malloc/free hooks are not supported on Windows.
+// XFAIL: win32
+
 #include <stdlib.h>
 #include <unistd.h>
+#include <sanitizer/allocator_interface.h>
 
 static void *glob_ptr;
 
 extern "C" {
-void __asan_free_hook(void *ptr) {
+void __sanitizer_free_hook(const volatile void *ptr) {
   if (ptr == glob_ptr) {
     *(int*)ptr = 0;
     write(1, "FreeHook\n", sizeof("FreeHook\n"));
diff --git a/test/asan/TestCases/frexp_interceptor.cc b/test/asan/TestCases/frexp_interceptor.cc
new file mode 100644
index 0000000..d75ba99
--- /dev/null
+++ b/test/asan/TestCases/frexp_interceptor.cc
@@ -0,0 +1,16 @@
+// RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
+
+// Test the frexp() interceptor.
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+int main() {
+  double x = 3.14;
+  int *exp = (int*)malloc(sizeof(int));
+  free(exp);
+  double y = frexp(x, exp);
+  // CHECK: use-after-free
+  // CHECK: SUMMARY
+  return 0;
+}
diff --git a/test/asan/TestCases/gc-test.cc b/test/asan/TestCases/gc-test.cc
index b15e668..ffbea85 100644
--- a/test/asan/TestCases/gc-test.cc
+++ b/test/asan/TestCases/gc-test.cc
@@ -1,6 +1,7 @@
-// RUN: %clangxx_asan %s -lpthread -o %t
+// RUN: %clangxx_asan %s -pthread -o %t
 // RUN: env ASAN_OPTIONS=detect_stack_use_after_return=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK1
 // RUN: env ASAN_OPTIONS=detect_stack_use_after_return=0 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK0
+// REQUIRES: stable-runtime
 
 #include <assert.h>
 #include <stdio.h>
diff --git a/test/asan/TestCases/global-demangle.cc b/test/asan/TestCases/global-demangle.cc
index 5f7ff91..2bfa0d1 100644
--- a/test/asan/TestCases/global-demangle.cc
+++ b/test/asan/TestCases/global-demangle.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=264
+// XFAIL: android
+//
 // RUN: %clangxx_asan -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s
 
 namespace XXX {
diff --git a/test/asan/TestCases/global-location.cc b/test/asan/TestCases/global-location.cc
new file mode 100644
index 0000000..54f2055
--- /dev/null
+++ b/test/asan/TestCases/global-location.cc
@@ -0,0 +1,41 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=264
+// XFAIL: android
+
+// RUN: %clangxx_asan -O2 %s -o %t
+// RUN: not %run %t g 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=GLOB
+// RUN: not %run %t c 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CLASS_STATIC
+// RUN: not %run %t f 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=FUNC_STATIC
+// RUN: not %run %t l 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LITERAL
+
+// CHECK: AddressSanitizer: global-buffer-overflow
+
+#include <string.h>
+
+struct C {
+  static int array[10];
+};
+
+int global[10];
+// GLOB: 0x{{.*}} is located 4 bytes to the right of global variable 'global' defined in '{{.*}}global-location.cc:[[@LINE-1]]:5' {{.*}} of size 40
+int C::array[10];
+// CLASS_STATIC: 0x{{.*}} is located 4 bytes to the right of global variable 'C::array' defined in '{{.*}}global-location.cc:[[@LINE-1]]:8' {{.*}} of size 40
+
+int main(int argc, char **argv) {
+  int one = argc - 1;
+  switch (argv[1][0]) {
+  case 'g': return global[one * 11];
+  case 'c': return C::array[one * 11];
+  case 'f':
+    static int array[10];
+    // FUNC_STATIC: 0x{{.*}} is located 4 bytes to the right of global variable 'main::array' defined in '{{.*}}global-location.cc:[[@LINE-1]]:16' {{.*}} of size 40
+    memset(array, 0, 10);
+    return array[one * 11];
+  case 'l':
+    const char *str = "0123456789";
+    // LITERAL: 0x{{.*}} is located 0 bytes to the right of global variable {{.*}} defined in '{{.*}}global-location.cc:[[@LINE-1]]:23' {{.*}} of size 11
+    return str[one * 11];
+  }
+  return 0;
+}
+
+// CHECK: SUMMARY: AddressSanitizer: global-buffer-overflow
diff --git a/test/asan/TestCases/init-order-atexit.cc b/test/asan/TestCases/init-order-atexit.cc
index c4defaf..e0dac32 100644
--- a/test/asan/TestCases/init-order-atexit.cc
+++ b/test/asan/TestCases/init-order-atexit.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // Test for the following situation:
 // (1) global A is constructed.
 // (2) exit() is called during construction of global B.
diff --git a/test/asan/TestCases/init-order-pthread-create.cc b/test/asan/TestCases/init-order-pthread-create.cc
index e333436..eeff308 100644
--- a/test/asan/TestCases/init-order-pthread-create.cc
+++ b/test/asan/TestCases/init-order-pthread-create.cc
@@ -1,7 +1,7 @@
 // Check that init-order checking is properly disabled if pthread_create is
 // called.
 
-// RUN: %clangxx_asan %s %p/Helpers/init-order-pthread-create-extra.cc -lpthread -o %t
+// RUN: %clangxx_asan %s %p/Helpers/init-order-pthread-create-extra.cc -pthread -o %t
 // RUN: env ASAN_OPTIONS=strict_init_order=true %run %t
 
 #include <stdio.h>
diff --git a/test/asan/TestCases/initialization-blacklist.cc b/test/asan/TestCases/initialization-blacklist.cc
index 42c78fa..8ea6b46 100644
--- a/test/asan/TestCases/initialization-blacklist.cc
+++ b/test/asan/TestCases/initialization-blacklist.cc
@@ -2,18 +2,15 @@
 
 // RUN: %clangxx_asan -O0 %s %p/Helpers/initialization-blacklist-extra.cc\
 // RUN:   %p/Helpers/initialization-blacklist-extra2.cc \
-// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt \
-// RUN:   -fsanitize=init-order -o %t
+// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
 // RUN: %clangxx_asan -O1 %s %p/Helpers/initialization-blacklist-extra.cc\
 // RUN:   %p/Helpers/initialization-blacklist-extra2.cc \
-// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt \
-// RUN:   -fsanitize=init-order -o %t
+// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
 // RUN: %clangxx_asan -O2 %s %p/Helpers/initialization-blacklist-extra.cc\
 // RUN:   %p/Helpers/initialization-blacklist-extra2.cc \
-// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt \
-// RUN:   -fsanitize=init-order -o %t
+// RUN:   -fsanitize-blacklist=%p/Helpers/initialization-blacklist.txt -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
 
 // Function is defined in another TU.
diff --git a/test/asan/TestCases/initialization-constexpr.cc b/test/asan/TestCases/initialization-constexpr.cc
index f964888..6442461 100644
--- a/test/asan/TestCases/initialization-constexpr.cc
+++ b/test/asan/TestCases/initialization-constexpr.cc
@@ -4,17 +4,13 @@
 // constructor implies that it was initialized during constant initialization,
 // not dynamic initialization).
 
-// RUN: %clangxx_asan -O0 %s %p/Helpers/initialization-constexpr-extra.cc\
-// RUN:   --std=c++11 -fsanitize=init-order -o %t
+// RUN: %clangxx_asan -O0 %s %p/Helpers/initialization-constexpr-extra.cc --std=c++11 -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
-// RUN: %clangxx_asan -O1 %s %p/Helpers/initialization-constexpr-extra.cc\
-// RUN:   --std=c++11 -fsanitize=init-order -o %t
+// RUN: %clangxx_asan -O1 %s %p/Helpers/initialization-constexpr-extra.cc --std=c++11 -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
-// RUN: %clangxx_asan -O2 %s %p/Helpers/initialization-constexpr-extra.cc\
-// RUN:   --std=c++11 -fsanitize=init-order -o %t
+// RUN: %clangxx_asan -O2 %s %p/Helpers/initialization-constexpr-extra.cc --std=c++11 -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
-// RUN: %clangxx_asan -O3 %s %p/Helpers/initialization-constexpr-extra.cc\
-// RUN:   --std=c++11 -fsanitize=init-order -o %t
+// RUN: %clangxx_asan -O3 %s %p/Helpers/initialization-constexpr-extra.cc --std=c++11 -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
 
 class Integer {
diff --git a/test/asan/TestCases/initialization-nobug.cc b/test/asan/TestCases/initialization-nobug.cc
index 87c5dc8..1249deb 100644
--- a/test/asan/TestCases/initialization-nobug.cc
+++ b/test/asan/TestCases/initialization-nobug.cc
@@ -1,13 +1,13 @@
 // A collection of various initializers which shouldn't trip up initialization
 // order checking.  If successful, this will just return 0.
 
-// RUN: %clangxx_asan -O0 %s %p/Helpers/initialization-nobug-extra.cc -fsanitize=init-order -o %t
+// RUN: %clangxx_asan -O0 %s %p/Helpers/initialization-nobug-extra.cc -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
-// RUN: %clangxx_asan -O1 %s %p/Helpers/initialization-nobug-extra.cc -fsanitize=init-order -o %t
+// RUN: %clangxx_asan -O1 %s %p/Helpers/initialization-nobug-extra.cc -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
-// RUN: %clangxx_asan -O2 %s %p/Helpers/initialization-nobug-extra.cc -fsanitize=init-order -o %t
+// RUN: %clangxx_asan -O2 %s %p/Helpers/initialization-nobug-extra.cc -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
-// RUN: %clangxx_asan -O3 %s %p/Helpers/initialization-nobug-extra.cc -fsanitize=init-order -o %t
+// RUN: %clangxx_asan -O3 %s %p/Helpers/initialization-nobug-extra.cc -o %t
 // RUN: env ASAN_OPTIONS=check_initialization_order=true %run %t 2>&1
 
 // Simple access:
diff --git a/test/asan/TestCases/invalid-free.cc b/test/asan/TestCases/invalid-free.cc
index fdfec73..34018fb 100644
--- a/test/asan/TestCases/invalid-free.cc
+++ b/test/asan/TestCases/invalid-free.cc
@@ -4,6 +4,7 @@
 // Also works if no malloc context is available.
 // RUN: env ASAN_OPTIONS=malloc_context_size=0:fast_unwind_on_malloc=0 not %run %t 2>&1 | FileCheck %s
 // RUN: env ASAN_OPTIONS=malloc_context_size=0:fast_unwind_on_malloc=1 not %run %t 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/asan/TestCases/large_func_test.cc b/test/asan/TestCases/large_func_test.cc
index bdd6f8f..0d651f6 100644
--- a/test/asan/TestCases/large_func_test.cc
+++ b/test/asan/TestCases/large_func_test.cc
@@ -2,6 +2,7 @@
 // RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
+// XFAIL: arm-linux-gnueabi
 
 #include <stdlib.h>
 __attribute__((noinline))
diff --git a/test/asan/TestCases/log-path_test.cc b/test/asan/TestCases/log-path_test.cc
index cac0c5f..5a1d072 100644
--- a/test/asan/TestCases/log-path_test.cc
+++ b/test/asan/TestCases/log-path_test.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // RUN: %clangxx_asan  %s -o %t
 
 // Regular run.
@@ -23,6 +26,8 @@
 // RUN: env ASAN_OPTIONS=log_path=%t.log  %run %t ARG ARG ARG
 // RUN: not cat %t.log.*
 
+// FIXME: log_path is not supported on Windows yet.
+// XFAIL: win32
 
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/asan/TestCases/lsan_annotations.cc b/test/asan/TestCases/lsan_annotations.cc
index f52b0ff..84c2878 100644
--- a/test/asan/TestCases/lsan_annotations.cc
+++ b/test/asan/TestCases/lsan_annotations.cc
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // Check that LSan annotations work fine.
 // RUN: %clangxx_asan -O0 %s -o %t && %run %t
 // RUN: %clangxx_asan -O3 %s -o %t && %run %t
diff --git a/test/asan/TestCases/malloc_context_size.cc b/test/asan/TestCases/malloc_context_size.cc
index 9f7ba40..fb158c6 100644
--- a/test/asan/TestCases/malloc_context_size.cc
+++ b/test/asan/TestCases/malloc_context_size.cc
@@ -16,12 +16,16 @@
   // CHECK-Linux-NEXT: #0 0x{{.*}} in operator delete[]
   // CHECK-Darwin: freed by thread T{{.*}} here:
   // CHECK-Darwin-NEXT: #0 0x{{.*}} in wrap__ZdaPv
+  // CHECK-Windows: freed by thread T{{.*}} here:
+  // CHECK-Windows-NEXT: #0 0x{{.*}} in operator delete[]
   // CHECK-NOT: #1 0x{{.*}}
 
   // CHECK-Linux: previously allocated by thread T{{.*}} here:
   // CHECK-Linux-NEXT: #0 0x{{.*}} in operator new[]
   // CHECK-Darwin: previously allocated by thread T{{.*}} here:
   // CHECK-Darwin-NEXT: #0 0x{{.*}} in wrap__Znam
+  // CHECK-Windows: previously allocated by thread T{{.*}} here:
+  // CHECK-Windows-NEXT: #0 0x{{.*}} in operator new[]
   // CHECK-NOT: #1 0x{{.*}}
 
   // CHECK: SUMMARY: AddressSanitizer: heap-use-after-free
diff --git a/test/asan/TestCases/malloc_hook.cc b/test/asan/TestCases/malloc_hook.cc
deleted file mode 100644
index c535ef8..0000000
--- a/test/asan/TestCases/malloc_hook.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-// RUN: %clangxx_asan -O2 %s -o %t
-// RUN: %run %t 2>&1 | FileCheck %s
-#include <stdlib.h>
-#include <unistd.h>
-
-extern "C" {
-bool __asan_get_ownership(const void *p);
-
-void *global_ptr;
-
-// Note: avoid calling functions that allocate memory in malloc/free
-// to avoid infinite recursion.
-void __asan_malloc_hook(void *ptr, size_t sz) {
-  if (__asan_get_ownership(ptr)) {
-    write(1, "MallocHook\n", sizeof("MallocHook\n"));
-    global_ptr = ptr;
-  }
-}
-void __asan_free_hook(void *ptr) {
-  if (__asan_get_ownership(ptr) && ptr == global_ptr)
-    write(1, "FreeHook\n", sizeof("FreeHook\n"));
-}
-}  // extern "C"
-
-int main() {
-  volatile int *x = new int;
-  // CHECK: MallocHook
-  // Check that malloc hook was called with correct argument.
-  if (global_ptr != (void*)x) {
-    _exit(1);
-  }
-  *x = 0;
-  delete x;
-  // CHECK: FreeHook
-  return 0;
-}
diff --git a/test/asan/TestCases/max_redzone.cc b/test/asan/TestCases/max_redzone.cc
index f2c0dee..01c25a9 100644
--- a/test/asan/TestCases/max_redzone.cc
+++ b/test/asan/TestCases/max_redzone.cc
@@ -8,17 +8,17 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
-#include <sanitizer/asan_interface.h>
+#include <sanitizer/allocator_interface.h>
 
 int main(int argc, char **argv) {
   if (argc < 2)
     return 1;
   bool large_redzone = atoi(argv[1]);
-  size_t before = __asan_get_heap_size();
+  size_t before = __sanitizer_get_heap_size();
   void *pp[10000];
   for (int i = 0; i < 10000; ++i)
     pp[i] = malloc(4096 - 64);
-  size_t after = __asan_get_heap_size();
+  size_t after = __sanitizer_get_heap_size();
   for (int i = 0; i < 10000; ++i)
     free(pp[i]);
   size_t diff = after - before;
diff --git a/test/asan/TestCases/mmap_limit_mb.cc b/test/asan/TestCases/mmap_limit_mb.cc
index 5fc0295..1d697ef 100644
--- a/test/asan/TestCases/mmap_limit_mb.cc
+++ b/test/asan/TestCases/mmap_limit_mb.cc
@@ -7,6 +7,7 @@
 // RUN: env ASAN_OPTIONS=mmap_limit_mb=500 %run %t 50 1000000
 // RUN: env ASAN_OPTIONS=mmap_limit_mb=500 not %run %t 500 16 2>&1 | FileCheck %s
 // RUN: env ASAN_OPTIONS=mmap_limit_mb=500 not %run %t 500 1000000 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #include <assert.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/no_asan_gen_globals.c b/test/asan/TestCases/no_asan_gen_globals.c
index a747d7a..0a383da 100644
--- a/test/asan/TestCases/no_asan_gen_globals.c
+++ b/test/asan/TestCases/no_asan_gen_globals.c
@@ -1,3 +1,6 @@
+// FIXME: https://code.google.com/p/address-sanitizer/issues/detail?id=316
+// XFAIL: android
+//
 // Make sure __asan_gen_* strings do not end up in the symbol table.
 
 // RUN: %clang_asan %s -o %t.exe
diff --git a/test/asan/TestCases/on_error_callback.cc b/test/asan/TestCases/on_error_callback.cc
index 0ad83d5..c378c8b 100644
--- a/test/asan/TestCases/on_error_callback.cc
+++ b/test/asan/TestCases/on_error_callback.cc
@@ -1,5 +1,8 @@
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s
 
+// FIXME: __asan_on_error() is not supported on Windows yet.
+// XFAIL: win32
+
 #include <stdio.h>
 #include <stdlib.h>
 
diff --git a/test/asan/TestCases/printf-1.c b/test/asan/TestCases/printf-1.c
index dee00a5..5657083 100644
--- a/test/asan/TestCases/printf-1.c
+++ b/test/asan/TestCases/printf-1.c
@@ -4,6 +4,10 @@
 // RUN: %run %t 2>&1 | FileCheck %s
 
 #include <stdio.h>
+#if defined(_WIN32)
+# define snprintf _snprintf
+#endif
+
 int main() {
   volatile char c = '0';
   volatile int x = 12;
diff --git a/test/asan/TestCases/printf-2.c b/test/asan/TestCases/printf-2.c
index f12c0b7..e9cb47e 100644
--- a/test/asan/TestCases/printf-2.c
+++ b/test/asan/TestCases/printf-2.c
@@ -5,6 +5,9 @@
 // RUN: env ASAN_OPTIONS=replace_str=0:replace_intrin=0:check_printf=0 %run %t 2>&1 | FileCheck --check-prefix=CHECK-OFF %s
 // RUN: env ASAN_OPTIONS=replace_str=0:replace_intrin=0 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
 
+// FIXME: printf is not intercepted on Windows yet.
+// XFAIL: win32
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/test/asan/TestCases/printf-3.c b/test/asan/TestCases/printf-3.c
index 387f6d5..d16833d 100644
--- a/test/asan/TestCases/printf-3.c
+++ b/test/asan/TestCases/printf-3.c
@@ -3,6 +3,9 @@
 // RUN: env ASAN_OPTIONS=check_printf=0 %run %t 2>&1 | FileCheck --check-prefix=CHECK-OFF %s
 // RUN: not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
 
+// FIXME: printf is not intercepted on Windows yet.
+// XFAIL: win32
+
 #include <stdio.h>
 int main() {
   volatile char c = '0';
diff --git a/test/asan/TestCases/printf-4.c b/test/asan/TestCases/printf-4.c
index b219d6f..e269211 100644
--- a/test/asan/TestCases/printf-4.c
+++ b/test/asan/TestCases/printf-4.c
@@ -4,6 +4,9 @@
 // RUN: env ASAN_OPTIONS=replace_str=0:replace_intrin=0:check_printf=1 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
 // RUN: env ASAN_OPTIONS=replace_str=0:replace_intrin=0 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
 
+// FIXME: printf is not intercepted on Windows yet.
+// XFAIL: win32
+
 #include <stdio.h>
 int main() {
   volatile char c = '0';
diff --git a/test/asan/TestCases/printf-5.c b/test/asan/TestCases/printf-5.c
index 5bb43cb..ac2c1c4 100644
--- a/test/asan/TestCases/printf-5.c
+++ b/test/asan/TestCases/printf-5.c
@@ -4,6 +4,9 @@
 // RUN: env ASAN_OPTIONS=replace_intrin=0:check_printf=0 %run %t 2>&1 | FileCheck --check-prefix=CHECK-OFF %s
 // RUN: env ASAN_OPTIONS=replace_intrin=0 not %run %t 2>&1 | FileCheck --check-prefix=CHECK-ON %s
 
+// FIXME: printf is not intercepted on Windows yet.
+// XFAIL: win32
+
 #include <stdio.h>
 #include <string.h>
 int main() {
diff --git a/test/asan/TestCases/sanity_check_pure_c.c b/test/asan/TestCases/sanity_check_pure_c.c
index 0ec86af..01d87e7 100644
--- a/test/asan/TestCases/sanity_check_pure_c.c
+++ b/test/asan/TestCases/sanity_check_pure_c.c
@@ -5,6 +5,7 @@
 // Sanity checking a test in pure C with -pie.
 // RUN: %clang_asan -O2 %s -pie -fPIE -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s
+// XFAIL: arm-linux-gnueabi
 
 #include <stdlib.h>
 int main() {
diff --git a/test/asan/TestCases/stack-overflow.cc b/test/asan/TestCases/stack-overflow.cc
index 4861fd2..234e3c7 100644
--- a/test/asan/TestCases/stack-overflow.cc
+++ b/test/asan/TestCases/stack-overflow.cc
@@ -1,19 +1,20 @@
 // Test ASan detection of stack-overflow condition.
 
-// RUN: %clangxx_asan -O0 %s -DSMALL_FRAME -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O3 %s -DSMALL_FRAME -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O0 %s -DSAVE_ALL_THE_REGISTERS -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O3 %s -DSAVE_ALL_THE_REGISTERS -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O0 %s -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O3 %s -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O0 %s -DSMALL_FRAME -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O3 %s -DSMALL_FRAME -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O0 %s -DSAVE_ALL_THE_REGISTERS -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O3 %s -DSAVE_ALL_THE_REGISTERS -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O0 %s -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O3 %s -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
 
-// RUN: %clangxx_asan -O0 %s -DTHREAD -DSMALL_FRAME -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O3 %s -DTHREAD -DSMALL_FRAME -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O0 %s -DTHREAD -DSAVE_ALL_THE_REGISTERS -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O3 %s -DTHREAD -DSAVE_ALL_THE_REGISTERS -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O0 %s -DTHREAD -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O3 %s -DTHREAD -lpthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O0 %s -DTHREAD -DSMALL_FRAME -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O3 %s -DTHREAD -DSMALL_FRAME -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O0 %s -DTHREAD -DSAVE_ALL_THE_REGISTERS -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O3 %s -DTHREAD -DSAVE_ALL_THE_REGISTERS -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O0 %s -DTHREAD -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O3 %s -DTHREAD -pthread -o %t && env ASAN_OPTIONS=use_sigaltstack=1 not %run %t 2>&1 | FileCheck %s
 // RUN: not %run %t 2>&1 | FileCheck %s
+// REQUIRES: stable-runtime
 
 #include <assert.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/stack-use-after-return.cc b/test/asan/TestCases/stack-use-after-return.cc
index e31a63c..437c457 100644
--- a/test/asan/TestCases/stack-use-after-return.cc
+++ b/test/asan/TestCases/stack-use-after-return.cc
@@ -1,14 +1,14 @@
 // RUN: export ASAN_OPTIONS=detect_stack_use_after_return=1
-// RUN: %clangxx_asan  -O0 %s -lpthread -o %t && not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan  -O1 %s -lpthread -o %t && not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan  -O2 %s -lpthread -o %t && not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan  -O3 %s -lpthread -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan  -O0 %s -pthread -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan  -O1 %s -pthread -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan  -O2 %s -pthread -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan  -O3 %s -pthread -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: env ASAN_OPTIONS=detect_stack_use_after_return=0 %run %t
 // Regression test for a CHECK failure with small stack size and large frame.
-// RUN: %clangxx_asan  -O3 %s -lpthread -o %t -DkSize=10000 -DUseThread -DkStackSize=65536 && not %run %t 2>&1 | FileCheck --check-prefix=THREAD %s
+// RUN: %clangxx_asan  -O3 %s -pthread -o %t -DkSize=10000 -DUseThread -DkStackSize=65536 && not %run %t 2>&1 | FileCheck --check-prefix=THREAD %s
 //
 // Test that we can find UAR in a thread other than main:
-// RUN: %clangxx_asan  -DUseThread -O2 %s -lpthread -o %t && not %run %t 2>&1 | FileCheck --check-prefix=THREAD %s
+// RUN: %clangxx_asan  -DUseThread -O2 %s -pthread -o %t && not %run %t 2>&1 | FileCheck --check-prefix=THREAD %s
 //
 // Test the max_uar_stack_size_log/min_uar_stack_size_log flag.
 //
diff --git a/test/asan/TestCases/strdup_oob_test.cc b/test/asan/TestCases/strdup_oob_test.cc
index 7716ee5..a039568 100644
--- a/test/asan/TestCases/strdup_oob_test.cc
+++ b/test/asan/TestCases/strdup_oob_test.cc
@@ -12,8 +12,9 @@
   int x = copy[4 + argc];  // BOOM
   // CHECK: AddressSanitizer: heap-buffer-overflow
   // CHECK: #0 {{.*}}main {{.*}}strdup_oob_test.cc:[[@LINE-2]]
-  // CHECK: allocated by thread T{{.*}} here:
-  // CHECK: #0 {{.*}}strdup
+  // CHECK-LABEL: allocated by thread T{{.*}} here:
+  // CHECK: #{{[01]}} {{.*}}strdup
+  // CHECK-LABEL: SUMMARY
   // CHECK: strdup_oob_test.cc:[[@LINE-6]]
   return x;
 }
diff --git a/test/asan/TestCases/strncpy-overflow.cc b/test/asan/TestCases/strncpy-overflow.cc
index 0d70b79..8001047 100644
--- a/test/asan/TestCases/strncpy-overflow.cc
+++ b/test/asan/TestCases/strncpy-overflow.cc
@@ -4,6 +4,7 @@
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 
 // REQUIRES: compiler-rt-optimized
+// XFAIL: arm-linux-gnueabi
 
 #include <string.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/throw_call_test.cc b/test/asan/TestCases/throw_call_test.cc
index dff112f..20e9a5e 100644
--- a/test/asan/TestCases/throw_call_test.cc
+++ b/test/asan/TestCases/throw_call_test.cc
@@ -5,6 +5,9 @@
 // Android builds with static libstdc++ by default.
 // XFAIL: android
 
+// Clang doesn't support exceptions on Windows yet.
+// XFAIL: win32
+
 #include <stdio.h>
 static volatile int zero = 0;
 inline void pretend_to_do_something(void *x) {
diff --git a/test/asan/TestCases/throw_catch.cc b/test/asan/TestCases/throw_catch.cc
index bb41946..f35378d 100644
--- a/test/asan/TestCases/throw_catch.cc
+++ b/test/asan/TestCases/throw_catch.cc
@@ -1,5 +1,8 @@
 // RUN: %clangxx_asan -O %s -o %t && %run %t
 
+// Clang doesn't support exceptions on Windows yet.
+// XFAIL: win32
+
 #include <assert.h>
 #include <setjmp.h>
 #include <stdlib.h>
diff --git a/test/asan/TestCases/throw_invoke_test.cc b/test/asan/TestCases/throw_invoke_test.cc
index 2fc557d..ec48fc7 100644
--- a/test/asan/TestCases/throw_invoke_test.cc
+++ b/test/asan/TestCases/throw_invoke_test.cc
@@ -1,5 +1,9 @@
 // RUN: %clangxx_asan %s -o %t && %run %t
 // RUN: %clangxx_asan %s -o %t -static-libstdc++ && %run %t
+
+// Clang doesn't support exceptions on Windows yet.
+// XFAIL: win32
+
 #include <stdio.h>
 static volatile int zero = 0;
 inline void pretend_to_do_something(void *x) {
diff --git a/test/asan/TestCases/time_interceptor.cc b/test/asan/TestCases/time_interceptor.cc
index 4fbd433..89b2183 100644
--- a/test/asan/TestCases/time_interceptor.cc
+++ b/test/asan/TestCases/time_interceptor.cc
@@ -2,6 +2,9 @@
 
 // Test the time() interceptor.
 
+// There's no interceptor for time() on Windows yet.
+// XFAIL: win32
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
@@ -12,5 +15,8 @@
   time_t t = time(tm);
   printf("Time: %s\n", ctime(&t));  // NOLINT
   // CHECK: use-after-free
+  // Regression check for
+  // https://code.google.com/p/address-sanitizer/issues/detail?id=321
+  // CHECK: SUMMARY
   return 0;
 }
diff --git a/test/asan/TestCases/uar_and_exceptions.cc b/test/asan/TestCases/uar_and_exceptions.cc
index 2e0c864..0bfe297 100644
--- a/test/asan/TestCases/uar_and_exceptions.cc
+++ b/test/asan/TestCases/uar_and_exceptions.cc
@@ -2,6 +2,9 @@
 // export ASAN_OPTIONS=detect_stack_use_after_return=1
 // RUN: %clangxx_asan  -O0 %s -o %t && %run %t
 
+// Clang doesn't support exceptions on Windows yet.
+// XFAIL: win32
+
 #include <stdio.h>
 
 volatile char *g;
diff --git a/test/asan/TestCases/use-after-delete.cc b/test/asan/TestCases/use-after-delete.cc
index 7696e18..f22e9e5 100644
--- a/test/asan/TestCases/use-after-delete.cc
+++ b/test/asan/TestCases/use-after-delete.cc
@@ -2,6 +2,7 @@
 // RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
+// XFAIL: arm-linux-gnueabi
 
 #include <stdlib.h>
 int main() {
diff --git a/test/asan/TestCases/use-after-free-right.cc b/test/asan/TestCases/use-after-free-right.cc
index 16c4348..68ac158 100644
--- a/test/asan/TestCases/use-after-free-right.cc
+++ b/test/asan/TestCases/use-after-free-right.cc
@@ -2,6 +2,7 @@
 // RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
+// XFAIL: arm-linux-gnueabi
 
 // Test use-after-free report in the case when access is at the right border of
 // the allocation.
diff --git a/test/asan/TestCases/use-after-free.cc b/test/asan/TestCases/use-after-free.cc
index 7f77aac..0cd87ee 100644
--- a/test/asan/TestCases/use-after-free.cc
+++ b/test/asan/TestCases/use-after-free.cc
@@ -2,6 +2,7 @@
 // RUN: %clangxx_asan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O2 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
 // RUN: %clangxx_asan -O3 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-%os --check-prefix=CHECK
+// XFAIL: arm-linux-gnueabi
 
 #include <stdlib.h>
 int main() {
diff --git a/test/asan/TestCases/use-after-scope-dtor-order.cc b/test/asan/TestCases/use-after-scope-dtor-order.cc
index b045888..7896dd3 100644
--- a/test/asan/TestCases/use-after-scope-dtor-order.cc
+++ b/test/asan/TestCases/use-after-scope-dtor-order.cc
@@ -1,5 +1,6 @@
 // RUN: %clangxx_asan -O0 -fsanitize=use-after-scope %s -o %t && \
 // RUN:     not %run %t 2>&1 | FileCheck %s
+// XFAIL: *
 #include <stdio.h>
 
 struct IntHolder {
diff --git a/test/asan/TestCases/use-after-scope-inlined.cc b/test/asan/TestCases/use-after-scope-inlined.cc
index 7e57a63..a0a0d94 100644
--- a/test/asan/TestCases/use-after-scope-inlined.cc
+++ b/test/asan/TestCases/use-after-scope-inlined.cc
@@ -3,6 +3,7 @@
 // llvm.lifetime intrinsics at -O0.
 //
 // RUN: %clangxx_asan -O2 -fsanitize=use-after-scope %s -o %t && not %run %t 2>&1 | FileCheck %s
+// XFAIL: *
 
 int *arr;
 
diff --git a/test/asan/TestCases/use-after-scope-nobug.cc b/test/asan/TestCases/use-after-scope-nobug.cc
index 0a1bbfe..21b085c 100644
--- a/test/asan/TestCases/use-after-scope-nobug.cc
+++ b/test/asan/TestCases/use-after-scope-nobug.cc
@@ -1,4 +1,5 @@
 // RUN: %clangxx_asan -O0 -fsanitize=use-after-scope %s -o %t && %run %t
+// XFAIL: *
 
 #include <stdio.h>
 
diff --git a/test/asan/TestCases/use-after-scope.cc b/test/asan/TestCases/use-after-scope.cc
index 49f756c..f98a8e6 100644
--- a/test/asan/TestCases/use-after-scope.cc
+++ b/test/asan/TestCases/use-after-scope.cc
@@ -1,6 +1,7 @@
 // RUN: %clangxx_asan -O0 -fsanitize=use-after-scope %s -o %t && \
 // RUN: not %run %t 2>&1 | FileCheck %s
 // RUN: env ASAN_OPTIONS="detect_stack_use_after_return=1" not %run %t 2>&1 | FileCheck %s
+// XFAIL: *
 
 int main() {
   int *p = 0;
diff --git a/test/asan/android_commands/android_run.py b/test/asan/android_commands/android_run.py
index a6ceeb4..7f8c612 100755
--- a/test/asan/android_commands/android_run.py
+++ b/test/asan/android_commands/android_run.py
@@ -23,7 +23,7 @@
 device_stdout = device_binary + '.stdout'
 device_stderr = device_binary + '.stderr'
 device_exitcode = device_binary + '.exitcode'
-ret = adb(['shell', 'cd %s && %s %s %s >%s 2>%s ; echo $? >%s' %
+ret = adb(['shell', 'cd %s && %s asanwrapper %s %s >%s 2>%s ; echo $? >%s' %
            (ANDROID_TMPDIR, device_env, device_binary, device_args,
             device_stdout, device_stderr, device_exitcode)])
 if ret != 0:
diff --git a/test/asan/lit.cfg b/test/asan/lit.cfg
index ef51266..db2459f 100644
--- a/test/asan/lit.cfg
+++ b/test/asan/lit.cfg
@@ -1,6 +1,7 @@
 # -*- Python -*-
 
 import os
+import platform
 
 def get_required_attr(config, attr_name):
   attr_value = getattr(config, attr_name, None)
@@ -25,26 +26,18 @@
 # GCC-ASan doesn't link in all the necessary libraries automatically, so
 # we have to do it ourselves.
 if config.compiler_id == 'GNU':
-  extra_linkflags = ["-lpthread", "-lstdc++", "-ldl"]
+  extra_linkflags = ["-pthread", "-lstdc++", "-ldl"]
 else:
   extra_linkflags = []
 # Setup default compiler flags used with -fsanitize=address option.
 # FIXME: Review the set of required flags and check if it can be reduced.
 target_cflags = [get_required_attr(config, "target_cflags")] + extra_linkflags
 target_cxxflags = config.cxx_mode_flags + target_cflags
-clang_asan_static_cflags = ["-fsanitize=address"] + target_cflags
-
-clang_path = getattr(config, 'clang', None)
-if clang_path.find("clang-cl") == -1:
-  clang_asan_static_cflags += ["-g",
-                               "-mno-omit-leaf-frame-pointer",
-                               "-fno-omit-frame-pointer",
-                               "-fno-optimize-sibling-calls"]
-else:
-  clang_asan_static_cflags += ["-Zi",
-                               "-Wno-deprecated-declarations",
-                               "-D_HAS_EXCEPTIONS=0"]
-
+clang_asan_static_cflags = ["-fsanitize=address",
+                            "-mno-omit-leaf-frame-pointer",
+                            "-fno-omit-frame-pointer",
+                            "-fno-optimize-sibling-calls",
+                            "-g"] + target_cflags
 clang_asan_static_cxxflags = config.cxx_mode_flags + clang_asan_static_cflags
 
 if config.asan_dynamic:
@@ -76,6 +69,17 @@
   config.substitutions.append( ("%clang_asan_static ", build_invocation(clang_asan_static_cflags)) )
   config.substitutions.append( ("%clangxx_asan_static ", build_invocation(clang_asan_static_cxxflags)) )
 
+# Windows-specific tests might also use the clang-cl.exe driver.
+if platform.system() == 'Windows':
+  clang_cl_asan_cxxflags = ["-fsanitize=address",
+                            "-Wno-deprecated-declarations",
+                            "-WX",
+                            "-D_HAS_EXCEPTIONS=0",
+                            "-Zi"] + target_cflags
+  clang_invocation = build_invocation(clang_cl_asan_cxxflags)
+  clang_cl_invocation = clang_invocation.replace("clang.exe","clang-cl.exe")
+  config.substitutions.append( ("%clang_cl_asan ", clang_cl_invocation) )
+
 # FIXME: De-hardcode this path.
 asan_source_dir = os.path.join(
   get_required_attr(config, "compiler_rt_src_root"), "lib", "asan")
@@ -106,6 +110,11 @@
 
 config.available_features.add("asan-" + config.bits + "-bits")
 
+# Allow tests to use REQUIRES=stable-runtime.  For use when you cannot use XFAIL
+# because the test hangs.
+if config.target_arch != 'arm':
+  config.available_features.add('stable-runtime')
+
 # Turn on leak detection on 64-bit Linux.
 if config.host_os == 'Linux' and config.bits == '64':
   config.environment['ASAN_OPTIONS'] = 'detect_leaks=1'
diff --git a/test/builtins/Unit/divtf3_test.c b/test/builtins/Unit/divtf3_test.c
new file mode 100644
index 0000000..dad631c
--- /dev/null
+++ b/test/builtins/Unit/divtf3_test.c
@@ -0,0 +1,94 @@
+//===--------------- divtf3_test.c - Test __divtf3 ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __divtf3 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+
+#if __LDBL_MANT_DIG__ == 113
+
+#include "fp_test.h"
+
+// Returns: a / b
+long double __divtf3(long double a, long double b);
+
+int test__divtf3(long double a, long double b,
+                 uint64_t expectedHi, uint64_t expectedLo)
+{
+    long double x = __divtf3(a, b);
+    int ret = compareResultLD(x, expectedHi, expectedLo);
+
+    if (ret){
+        printf("error in test__divtf3(%.20Lf, %.20Lf) = %.20Lf, "
+               "expected %.20Lf\n", a, b, x,
+               fromRep128(expectedHi, expectedLo));
+    }
+    return ret;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main()
+{
+#if __LDBL_MANT_DIG__ == 113
+    // qNaN / any = qNaN
+    if (test__divtf3(makeQNaN128(),
+                     0x1.23456789abcdefp+5L,
+                     UINT64_C(0x7fff800000000000),
+                     UINT64_C(0x0)))
+        return 1;
+    // NaN / any = NaN
+    if (test__divtf3(makeNaN128(UINT64_C(0x800030000000)),
+                     0x1.23456789abcdefp+5L,
+                     UINT64_C(0x7fff800000000000),
+                     UINT64_C(0x0)))
+        return 1;
+    // inf / any = inf
+    if (test__divtf3(makeInf128(),
+                     0x1.23456789abcdefp+5L,
+                     UINT64_C(0x7fff000000000000),
+                     UINT64_C(0x0)))
+        return 1;
+    // any / any
+    if (test__divtf3(0x1.a23b45362464523375893ab4cdefp+5L,
+                     0x1.eedcbaba3a94546558237654321fp-1L,
+                     UINT64_C(0x4004b0b72924d407),
+                     UINT64_C(0x0717e84356c6eba2)))
+        return 1;
+    if (test__divtf3(0x1.a2b34c56d745382f9abf2c3dfeffp-50L,
+                     0x1.ed2c3ba15935332532287654321fp-9L,
+                     UINT64_C(0x3fd5b2af3f828c9b),
+                     UINT64_C(0x40e51f64cde8b1f2)))
+        return 15;
+    if (test__divtf3(0x1.2345f6aaaa786555f42432abcdefp+456L,
+                     0x1.edacbba9874f765463544dd3621fp+6400L,
+                     UINT64_C(0x28c62e15dc464466),
+                     UINT64_C(0xb5a07586348557ac)))
+        return 1;
+    if (test__divtf3(0x1.2d3456f789ba6322bc665544edefp-234L,
+                     0x1.eddcdba39f3c8b7a36564354321fp-4455L,
+                     UINT64_C(0x507b38442b539266),
+                     UINT64_C(0x22ce0f1d024e1252)))
+        return 1;
+    if (test__divtf3(0x1.2345f6b77b7a8953365433abcdefp+234L,
+                     0x1.edcba987d6bb3aa467754354321fp-4055L,
+                     UINT64_C(0x50bf2e02f0798d36),
+                     UINT64_C(0x5e6fcb6b60044078)))
+        return 1;
+
+#else
+    printf("skipped\n");
+
+#endif
+    return 0;
+}
diff --git a/test/builtins/Unit/extenddftf2_test.c b/test/builtins/Unit/extenddftf2_test.c
new file mode 100644
index 0000000..05acc08
--- /dev/null
+++ b/test/builtins/Unit/extenddftf2_test.c
@@ -0,0 +1,82 @@
+//===--------------- extenddftf2_test.c - Test __extenddftf2 --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __extenddftf2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+
+#if __LDBL_MANT_DIG__ == 113
+
+#include "fp_test.h"
+
+long double __extenddftf2(double a);
+
+int test__extenddftf2(double a, uint64_t expectedHi, uint64_t expectedLo)
+{
+    long double x = __extenddftf2(a);
+    int ret = compareResultLD(x, expectedHi, expectedLo);
+
+    if (ret){
+        printf("error in test__extenddftf2(%f) = %.20Lf, "
+               "expected %.20Lf\n", a, x, fromRep128(expectedHi, expectedLo));
+    }
+    return ret;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main()
+{
+#if __LDBL_MANT_DIG__ == 113
+    // qNaN
+    if (test__extenddftf2(makeQNaN64(),
+                          UINT64_C(0x7fff800000000000),
+                          UINT64_C(0x0)))
+        return 1;
+    // NaN
+    if (test__extenddftf2(makeNaN64(UINT64_C(0x7100000000000)),
+                          UINT64_C(0x7fff710000000000),
+                          UINT64_C(0x0)))
+        return 1;
+    // inf
+    if (test__extenddftf2(makeInf64(),
+                          UINT64_C(0x7fff000000000000),
+                          UINT64_C(0x0)))
+        return 1;
+    // zero
+    if (test__extenddftf2(0.0, UINT64_C(0x0), UINT64_C(0x0)))
+        return 1;
+
+    if (test__extenddftf2(0x1.23456789abcdefp+5,
+                          UINT64_C(0x400423456789abcd),
+                          UINT64_C(0xf000000000000000)))
+        return 1;
+    if (test__extenddftf2(0x1.edcba987654321fp-9,
+                          UINT64_C(0x3ff6edcba9876543),
+                          UINT64_C(0x2000000000000000)))
+        return 1;
+    if (test__extenddftf2(0x1.23456789abcdefp+45,
+                          UINT64_C(0x402c23456789abcd),
+                          UINT64_C(0xf000000000000000)))
+        return 1;
+    if (test__extenddftf2(0x1.edcba987654321fp-45,
+                          UINT64_C(0x3fd2edcba9876543),
+                          UINT64_C(0x2000000000000000)))
+        return 1;
+
+#else
+    printf("skipped\n");
+
+#endif
+    return 0;
+}
diff --git a/test/builtins/Unit/extendsftf2_test.c b/test/builtins/Unit/extendsftf2_test.c
new file mode 100644
index 0000000..5f41928
--- /dev/null
+++ b/test/builtins/Unit/extendsftf2_test.c
@@ -0,0 +1,83 @@
+//===--------------- extendsftf2_test.c - Test __extendsftf2 --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __extendsftf2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+
+#if __LDBL_MANT_DIG__ == 113
+
+#include "fp_test.h"
+
+long double __extendsftf2(float a);
+
+int test__extendsftf2(float a, uint64_t expectedHi, uint64_t expectedLo)
+{
+    long double x = __extendsftf2(a);
+    int ret = compareResultLD(x, expectedHi, expectedLo);
+
+    if (ret)
+    {
+        printf("error in test__extendsftf2(%f) = %.20Lf, "
+               "expected %.20Lf\n", a, x, fromRep128(expectedHi, expectedLo));
+    }
+    return ret;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main()
+{
+#if __LDBL_MANT_DIG__ == 113
+    // qNaN
+    if (test__extendsftf2(makeQNaN32(),
+                          UINT64_C(0x7fff800000000000),
+                          UINT64_C(0x0)))
+        return 1;
+    // NaN
+    if (test__extendsftf2(makeNaN32(UINT32_C(0x410000)),
+                          UINT64_C(0x7fff820000000000),
+                          UINT64_C(0x0)))
+        return 1;
+    // inf
+    if (test__extendsftf2(makeInf32(),
+                          UINT64_C(0x7fff000000000000),
+                          UINT64_C(0x0)))
+        return 1;
+    // zero
+    if (test__extendsftf2(0.0f, UINT64_C(0x0), UINT64_C(0x0)))
+        return 1;
+
+    if (test__extendsftf2(0x1.23456p+5f,
+                          UINT64_C(0x4004234560000000),
+                          UINT64_C(0x0)))
+        return 1;
+    if (test__extendsftf2(0x1.edcbap-9f,
+                          UINT64_C(0x3ff6edcba0000000),
+                          UINT64_C(0x0)))
+        return 1;
+    if (test__extendsftf2(0x1.23456p+45f,
+                          UINT64_C(0x402c234560000000),
+                          UINT64_C(0x0)))
+        return 1;
+    if (test__extendsftf2(0x1.edcbap-45f,
+                          UINT64_C(0x3fd2edcba0000000),
+                          UINT64_C(0x0)))
+        return 1;
+
+#else
+    printf("skipped\n");
+
+#endif
+    return 0;
+}
diff --git a/test/builtins/Unit/trunctfdf2_test.c b/test/builtins/Unit/trunctfdf2_test.c
new file mode 100644
index 0000000..46855e3
--- /dev/null
+++ b/test/builtins/Unit/trunctfdf2_test.c
@@ -0,0 +1,76 @@
+//===-------------- trunctfdf2_test.c - Test __trunctfdf2 -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __trunctfdf2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+
+#if __LDBL_MANT_DIG__ == 113
+
+#include "fp_test.h"
+
+double __trunctfdf2(long double a);
+
+int test__trunctfdf2(long double a, uint64_t expected)
+{
+    double x = __trunctfdf2(a);
+    int ret = compareResultD(x, expected);
+
+    if (ret)
+    {
+        printf("error in test__trunctfdf2(%.20Lf) = %lf, "
+               "expected %lf\n", a, x, fromRep64(expected));
+    }
+    return ret;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main()
+{
+#if __LDBL_MANT_DIG__ == 113
+    // qNaN
+    if (test__trunctfdf2(makeQNaN128(),
+                         UINT64_C(0x7ff8000000000000)))
+        return 1;
+    // NaN
+    if (test__trunctfdf2(makeNaN128(UINT64_C(0x810000000000)),
+                         UINT64_C(0x7ff8100000000000)))
+        return 1;
+    // inf
+    if (test__trunctfdf2(makeInf128(),
+                         UINT64_C(0x7ff0000000000000)))
+        return 1;
+    // zero
+    if (test__trunctfdf2(0.0L, UINT64_C(0x0)))
+        return 1;
+
+    if (test__trunctfdf2(0x1.af23456789bbaaab347645365cdep+5L,
+                         UINT64_C(0x404af23456789bbb)))
+        return 1;
+    if (test__trunctfdf2(0x1.dedafcff354b6ae9758763545432p-9L,
+                         UINT64_C(0x3f6dedafcff354b7)))
+        return 1;
+    if (test__trunctfdf2(0x1.2f34dd5f437e849b4baab754cdefp+4534L,
+                         UINT64_C(0x7ff0000000000000)))
+        return 1;
+    if (test__trunctfdf2(0x1.edcbff8ad76ab5bf46463233214fp-435L,
+                         UINT64_C(0x24cedcbff8ad76ab)))
+        return 1;
+
+#else
+    printf("skipped\n");
+
+#endif
+    return 0;
+}
diff --git a/test/builtins/Unit/trunctfsf2_test.c b/test/builtins/Unit/trunctfsf2_test.c
new file mode 100644
index 0000000..44e7fd1
--- /dev/null
+++ b/test/builtins/Unit/trunctfsf2_test.c
@@ -0,0 +1,75 @@
+//===--------------- trunctfsf2_test.c - Test __trunctfsf2 ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file tests __trunctfsf2 for the compiler_rt library.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+
+#if __LDBL_MANT_DIG__ == 113
+
+#include "fp_test.h"
+
+float __trunctfsf2(long double a);
+
+int test__trunctfsf2(long double a, uint32_t expected)
+{
+    float x = __trunctfsf2(a);
+    int ret = compareResultF(x, expected);
+
+    if (ret){
+        printf("error in test__trunctfsf2(%.20Lf) = %f, "
+               "expected %f\n", a, x, fromRep32(expected));
+    }
+    return ret;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main()
+{
+#if __LDBL_MANT_DIG__ == 113
+    // qNaN
+    if (test__trunctfsf2(makeQNaN128(),
+                         UINT32_C(0x7fc00000)))
+        return 1;
+    // NaN
+    if (test__trunctfsf2(makeNaN128(UINT64_C(0x810000000000)),
+                         UINT32_C(0x7fc08000)))
+        return 1;
+    // inf
+    if (test__trunctfsf2(makeInf128(),
+                         UINT32_C(0x7f800000)))
+        return 1;
+    // zero
+    if (test__trunctfsf2(0.0L, UINT32_C(0x0)))
+        return 1;
+
+    if (test__trunctfsf2(0x1.23a2abb4a2ddee355f36789abcdep+5L,
+                         UINT32_C(0x4211d156)))
+        return 1;
+    if (test__trunctfsf2(0x1.e3d3c45bd3abfd98b76a54cc321fp-9L,
+                         UINT32_C(0x3b71e9e2)))
+        return 1;
+    if (test__trunctfsf2(0x1.234eebb5faa678f4488693abcdefp+4534L,
+                         UINT32_C(0x7f800000)))
+        return 1;
+    if (test__trunctfsf2(0x1.edcba9bb8c76a5a43dd21f334634p-435L,
+                         UINT32_C(0x0)))
+        return 1;
+
+#else
+    printf("skipped\n");
+
+#endif
+    return 0;
+}
diff --git a/test/dfsan/basic.c b/test/dfsan/basic.c
index 3c87bdc..6582727 100644
--- a/test/dfsan/basic.c
+++ b/test/dfsan/basic.c
@@ -17,5 +17,12 @@
   dfsan_label read_label = dfsan_read_label(&i, sizeof(i));
   assert(i_label == read_label);
 
+  dfsan_label j_label = dfsan_create_label("j", 0);
+  dfsan_add_label(j_label, &i, sizeof(i));
+
+  read_label = dfsan_read_label(&i, sizeof(i));
+  assert(dfsan_has_label(read_label, i_label));
+  assert(dfsan_has_label(read_label, j_label));
+
   return 0;
 }
diff --git a/test/lit.common.cfg b/test/lit.common.cfg
index 5366073..adf65ee 100644
--- a/test/lit.common.cfg
+++ b/test/lit.common.cfg
@@ -7,6 +7,7 @@
 import platform
 
 import lit.formats
+import lit.util
 
 # Setup test format
 execute_external = (platform.system() != 'Windows'
@@ -77,3 +78,5 @@
 compiler_rt_debug = getattr(config, 'compiler_rt_debug', False)
 if not compiler_rt_debug:
   config.available_features.add('compiler-rt-optimized')
+
+lit.util.usePlatformSdkOnDarwin(config, lit_config)
diff --git a/test/msan/SharedLibs/dso-origin-so.cc b/test/msan/SharedLibs/dso-origin-so.cc
deleted file mode 100644
index 8930a71..0000000
--- a/test/msan/SharedLibs/dso-origin-so.cc
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <stdlib.h>
-
-#include "dso-origin.h"
-
-void my_access(int *p) {
-  volatile int tmp;
-  // Force initialize-ness check.
-  if (*p)
-    tmp = 1;
-}
-
-void *my_alloc(unsigned sz) {
-  return malloc(sz);
-}
diff --git a/test/msan/SharedLibs/dso-origin.h b/test/msan/SharedLibs/dso-origin.h
deleted file mode 100644
index ff926b3..0000000
--- a/test/msan/SharedLibs/dso-origin.h
+++ /dev/null
@@ -1,4 +0,0 @@
-extern "C" {
-void my_access(int *p);
-void *my_alloc(unsigned sz);
-}
diff --git a/test/msan/SharedLibs/lit.local.cfg b/test/msan/SharedLibs/lit.local.cfg
deleted file mode 100644
index b3677c1..0000000
--- a/test/msan/SharedLibs/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-# Sources in this directory are compiled as shared libraries and used by
-# tests in parent directory.
-
-config.suffixes = []
diff --git a/test/msan/chained_origin.cc b/test/msan/chained_origin.cc
index f69de9a..336bbd8 100644
--- a/test/msan/chained_origin.cc
+++ b/test/msan/chained_origin.cc
@@ -6,6 +6,16 @@
 // RUN:     not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-HEAP < %t.out
 
+
+// RUN: %clangxx_msan -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -m64 -O3 %s -o %t && \
+// RUN:     not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-STACK < %t.out
+
+// RUN: %clangxx_msan -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -DHEAP=1 -m64 -O3 %s -o %t && \
+// RUN:     not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-HEAP < %t.out
+
+
 #include <stdio.h>
 
 volatile int x, y;
@@ -38,19 +48,19 @@
 }
 
 // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
-// CHECK: {{#0 .* in main.*chained_origin.cc:37}}
+// CHECK: {{#0 .* in main.*chained_origin.cc:47}}
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#0 .* in fn_h.*chained_origin.cc:25}}
-// CHECK: {{#1 .* in main.*chained_origin.cc:36}}
+// CHECK: {{#0 .* in fn_h.*chained_origin.cc:35}}
+// CHECK: {{#1 .* in main.*chained_origin.cc:46}}
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#0 .* in fn_g.*chained_origin.cc:15}}
-// CHECK: {{#1 .* in fn_f.*chained_origin.cc:20}}
-// CHECK: {{#2 .* in main.*chained_origin.cc:35}}
+// CHECK: {{#0 .* in fn_g.*chained_origin.cc:25}}
+// CHECK: {{#1 .* in fn_f.*chained_origin.cc:30}}
+// CHECK: {{#2 .* in main.*chained_origin.cc:45}}
 
 // CHECK-STACK: Uninitialized value was created by an allocation of 'z' in the stack frame of function 'main'
-// CHECK-STACK: {{#0 .* in main.*chained_origin.cc:28}}
+// CHECK-STACK: {{#0 .* in main.*chained_origin.cc:38}}
 
 // CHECK-HEAP: Uninitialized value was created by a heap allocation
-// CHECK-HEAP: {{#1 .* in main.*chained_origin.cc:30}}
+// CHECK-HEAP: {{#1 .* in main.*chained_origin.cc:40}}
diff --git a/test/msan/chained_origin_empty_stack.cc b/test/msan/chained_origin_empty_stack.cc
new file mode 100644
index 0000000..36727e3
--- /dev/null
+++ b/test/msan/chained_origin_empty_stack.cc
@@ -0,0 +1,34 @@
+// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -m64 -O3 %s -o %t && \
+// RUN:     MSAN_OPTIONS=store_context_size=1 not %run %t 2>&1 | FileCheck %s
+
+// Test that stack trace for the intermediate store is not empty.
+
+// CHECK: MemorySanitizer: use-of-uninitialized-value
+// CHECK:   #0 {{.*}} in main
+
+// CHECK: Uninitialized value was stored to memory at
+// CHECK:   #0 {{.*}} in fn_g
+// CHECK-NOT: #1
+
+// CHECK: Uninitialized value was created by an allocation of 'z' in the stack frame of function 'main'
+// CHECK:   #0 {{.*}} in main
+
+#include <stdio.h>
+
+volatile int x;
+
+__attribute__((noinline))
+void fn_g(int a) {
+  x = a;
+}
+
+__attribute__((noinline))
+void fn_f(int a) {
+  fn_g(a);
+}
+
+int main(int argc, char *argv[]) {
+  int volatile z;
+  fn_f(z);
+  return x;
+}
diff --git a/test/msan/chained_origin_limits.cc b/test/msan/chained_origin_limits.cc
index c6f8b62..a8621f3 100644
--- a/test/msan/chained_origin_limits.cc
+++ b/test/msan/chained_origin_limits.cc
@@ -1,5 +1,6 @@
 // This test program creates a very large number of unique histories.
 
+// Heap origin.
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -m64 -O3 %s -o %t
 
 // RUN: MSAN_OPTIONS=origin_history_size=7 not %run %t >%t.out 2>&1
@@ -11,6 +12,44 @@
 // RUN: MSAN_OPTIONS=origin_history_per_stack_limit=1 not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK-PER-STACK < %t.out
 
+// Stack origin.
+// RUN: %clangxx_msan -DSTACK -fsanitize-memory-track-origins=2 -m64 -O3 %s -o %t
+
+// RUN: MSAN_OPTIONS=origin_history_size=7 not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK7 < %t.out
+
+// RUN: MSAN_OPTIONS=origin_history_size=2 not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK2 < %t.out
+
+// RUN: MSAN_OPTIONS=origin_history_per_stack_limit=1 not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK < %t.out
+
+
+// Heap origin, with calls.
+// RUN: %clangxx_msan -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -m64 -O3 %s -o %t
+
+// RUN: MSAN_OPTIONS=origin_history_size=7 not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK7 < %t.out
+
+// RUN: MSAN_OPTIONS=origin_history_size=2 not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK2 < %t.out
+
+// RUN: MSAN_OPTIONS=origin_history_per_stack_limit=1 not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK < %t.out
+
+
+// Stack origin, with calls.
+// RUN: %clangxx_msan -DSTACK -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -m64 -O3 %s -o %t
+
+// RUN: MSAN_OPTIONS=origin_history_size=7 not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK7 < %t.out
+
+// RUN: MSAN_OPTIONS=origin_history_size=2 not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK2 < %t.out
+
+// RUN: MSAN_OPTIONS=origin_history_per_stack_limit=1 not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK-PER-STACK < %t.out
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -19,6 +58,11 @@
 static char *buf, *cur, *end;
 void init() {
   buf = new char[1000];
+#ifdef STACK
+  char stackbuf[1000];
+  char *volatile p = stackbuf;
+  memcpy(buf, p, 1000);
+#endif
   cur = buf;
   end = buf + 1000;
 }
@@ -83,13 +127,13 @@
 // CHECK7-NOT: Uninitialized value was stored to memory at
 // CHECK7: Uninitialized value was stored to memory at
 // CHECK7-NOT: Uninitialized value was stored to memory at
-// CHECK7: Uninitialized value was created by a heap allocation
+// CHECK7: Uninitialized value was created
 
 // CHECK2: WARNING: MemorySanitizer: use-of-uninitialized-value
 // CHECK2-NOT: Uninitialized value was stored to memory at
 // CHECK2: Uninitialized value was stored to memory at
 // CHECK2-NOT: Uninitialized value was stored to memory at
-// CHECK2: Uninitialized value was created by a heap allocation
+// CHECK2: Uninitialized value was created
 
 // CHECK-PER-STACK: WARNING: MemorySanitizer: use-of-uninitialized-value
 // CHECK-PER-STACK: Uninitialized value was stored to memory at
@@ -98,4 +142,4 @@
 // CHECK-PER-STACK: in fn2
 // CHECK-PER-STACK: Uninitialized value was stored to memory at
 // CHECK-PER-STACK: in fn1
-// CHECK-PER-STACK: Uninitialized value was created by a heap allocation
+// CHECK-PER-STACK: Uninitialized value was created
diff --git a/test/msan/chained_origin_memcpy.cc b/test/msan/chained_origin_memcpy.cc
index e56db9c..f4c2f7f 100644
--- a/test/msan/chained_origin_memcpy.cc
+++ b/test/msan/chained_origin_memcpy.cc
@@ -6,6 +6,16 @@
 // RUN:     not %run %t >%t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z2 < %t.out
 
+
+// RUN: %clangxx_msan -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -m64 -DOFFSET=0 -O3 %s -o %t && \
+// RUN:     not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z1 < %t.out
+
+// RUN: %clangxx_msan -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -DOFFSET=10 -m64 -O3 %s -o %t && \
+// RUN:     not %run %t >%t.out 2>&1
+// RUN: FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-Z2 < %t.out
+
+
 #include <stdio.h>
 #include <string.h>
 
@@ -37,15 +47,15 @@
 }
 
 // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
-// CHECK: {{#0 .* in main .*chained_origin_memcpy.cc:36}}
+// CHECK: {{#0 .* in main .*chained_origin_memcpy.cc:46}}
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#1 .* in fn_h.*chained_origin_memcpy.cc:28}}
+// CHECK: {{#1 .* in fn_h.*chained_origin_memcpy.cc:38}}
 
 // CHECK: Uninitialized value was stored to memory at
-// CHECK: {{#0 .* in fn_g.*chained_origin_memcpy.cc:18}}
-// CHECK: {{#1 .* in fn_f.*chained_origin_memcpy.cc:23}}
+// CHECK: {{#0 .* in fn_g.*chained_origin_memcpy.cc:28}}
+// CHECK: {{#1 .* in fn_f.*chained_origin_memcpy.cc:33}}
 
 // CHECK-Z1: Uninitialized value was created by an allocation of 'z1' in the stack frame of function 'main'
 // CHECK-Z2: Uninitialized value was created by an allocation of 'z2' in the stack frame of function 'main'
-// CHECK: {{#0 .* in main.*chained_origin_memcpy.cc:31}}
+// CHECK: {{#0 .* in main.*chained_origin_memcpy.cc:41}}
diff --git a/test/msan/chained_origin_with_signals.cc b/test/msan/chained_origin_with_signals.cc
index 5fd497e..ef98385 100644
--- a/test/msan/chained_origin_with_signals.cc
+++ b/test/msan/chained_origin_with_signals.cc
@@ -6,6 +6,10 @@
 // RUN:     not %run %t >%t.out 2>&1
 // RUN: FileCheck %s < %t.out
 
+// RUN: %clangxx_msan -mllvm -msan-instrumentation-with-call-threshold=0 -fsanitize-memory-track-origins=2 -m64 -O3 %s -o %t && \
+// RUN:     not %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+
 #include <signal.h>
 #include <stdio.h>
 #include <sys/types.h>
diff --git a/test/msan/dso-origin.cc b/test/msan/dso-origin.cc
index 9bde029..ba008c0 100644
--- a/test/msan/dso-origin.cc
+++ b/test/msan/dso-origin.cc
@@ -1,12 +1,33 @@
 // Build a library with origin tracking and an executable w/o origin tracking.
 // Test that origin tracking is enabled at runtime.
-// RUN: %clangxx_msan -fsanitize-memory-track-origins -m64 -O0 %p/SharedLibs/dso-origin-so.cc \
-// RUN:     -fPIC -shared -o %t-so.so
+// RUN: %clangxx_msan -fsanitize-memory-track-origins -m64 -O0 %s -DBUILD_SO -fPIC -shared -o %t-so.so
 // RUN: %clangxx_msan -m64 -O0 %s %t-so.so -o %t && not %run %t 2>&1 | FileCheck %s
 
+#ifdef BUILD_SO
+
 #include <stdlib.h>
 
-#include "SharedLibs/dso-origin.h"
+extern "C" {
+void my_access(int *p) {
+  volatile int tmp;
+  // Force initialize-ness check.
+  if (*p)
+    tmp = 1;
+}
+
+void *my_alloc(unsigned sz) {
+  return malloc(sz);
+}
+}  // extern "C"
+
+#else  // BUILD_SO
+
+#include <stdlib.h>
+
+extern "C" {
+void my_access(int *p);
+void *my_alloc(unsigned sz);
+}
 
 int main(int argc, char **argv) {
   int *x = (int *)my_alloc(sizeof(int));
@@ -14,12 +35,14 @@
   delete x;
 
   // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
-  // CHECK: {{#0 0x.* in my_access .*dso-origin-so.cc:}}
+  // CHECK: {{#0 0x.* in my_access .*dso-origin.cc:}}
   // CHECK: {{#1 0x.* in main .*dso-origin.cc:}}[[@LINE-5]]
   // CHECK: Uninitialized value was created by a heap allocation
   // CHECK: {{#0 0x.* in .*malloc}}
-  // CHECK: {{#1 0x.* in my_alloc .*dso-origin-so.cc:}}
+  // CHECK: {{#1 0x.* in my_alloc .*dso-origin.cc:}}
   // CHECK: {{#2 0x.* in main .*dso-origin.cc:}}[[@LINE-10]]
-  // CHECK: SUMMARY: MemorySanitizer: use-of-uninitialized-value {{.*dso-origin-so.cc:.* my_access}}
+  // CHECK: SUMMARY: MemorySanitizer: use-of-uninitialized-value {{.*dso-origin.cc:.* my_access}}
   return 0;
 }
+
+#endif  // BUILD_SO
diff --git a/test/msan/dtls_test.c b/test/msan/dtls_test.c
index 5086389..cb88ede 100644
--- a/test/msan/dtls_test.c
+++ b/test/msan/dtls_test.c
@@ -1,10 +1,10 @@
-/* RUN: %clang_msan -m64 %s -o %t
-   RUN: %clang_msan -m64 %s -DBUILD_SO -fPIC -o %t-so.so -shared
-   RUN: not %run %t 2>&1 | FileCheck %s
-   CHECK: MemorySanitizer: use-of-uninitialized-value
+/* RUN: %clang_msan -g -m64 %s -o %t
+   RUN: %clang_msan -g -m64 %s -DBUILD_SO -fPIC -o %t-so.so -shared
+   RUN: %run %t 2>&1
 
-   This is an actual bug in msan/glibc integration,
+   Regression test for a bug in msan/glibc integration,
    see https://sourceware.org/bugzilla/show_bug.cgi?id=16291
+   and https://code.google.com/p/memory-sanitizer/issues/detail?id=44
 */
 
 #ifndef BUILD_SO
diff --git a/test/msan/getline.cc b/test/msan/getline.cc
index 086d0b9..51e105e 100644
--- a/test/msan/getline.cc
+++ b/test/msan/getline.cc
@@ -1,4 +1,11 @@
-// RUN: %clangxx_msan -O0 %s -o %t && %run %t %p
+// RUN: echo "abcde" > %t-testdata
+// RUN: echo "12345" >> %t-testdata
+// RUN: %clangxx_msan -O0 %s -o %t && %run %t %t-testdata
+// RUN: %clangxx_msan -O2 %s -o %t && %run %t %t-testdata
+// RUN: %clang_msan -O0 -xc %s -o %t && %run %t %t-testdata
+// RUN: %clang_msan -O2 -xc %s -o %t && %run %t %t-testdata
+// RUN: %clang_msan -O0 -xc -D_GNU_SOURCE=1 %s -o %t && %run %t %t-testdata
+// RUN: %clang_msan -O2 -xc -D_GNU_SOURCE=1 %s -o %t && %run %t %t-testdata
 
 #include <assert.h>
 #include <stdio.h>
@@ -7,10 +14,9 @@
 
 int main(int argc, char **argv) {
   assert(argc == 2);
-  char buf[1024];
-  snprintf(buf, sizeof(buf), "%s/%s", argv[1], "getline_test_data");
+  printf("%s\n", argv[1]);
 
-  FILE *fp = fopen(buf, "r");
+  FILE *fp = fopen(argv[1], "r");
   assert(fp);
 
   char *line = 0;
@@ -22,7 +28,7 @@
   n = getline(&line, &len, fp);
   assert(n == 6);
   assert(strcmp(line, "12345\n") == 0);
-  
+
   free(line);
   fclose(fp);
 
diff --git a/test/msan/getline_test_data b/test/msan/getline_test_data
deleted file mode 100644
index 5ba1d4c..0000000
--- a/test/msan/getline_test_data
+++ /dev/null
@@ -1,2 +0,0 @@
-abcde
-12345
diff --git a/test/msan/malloc_hook.cc b/test/msan/malloc_hook.cc
deleted file mode 100644
index 5393080..0000000
--- a/test/msan/malloc_hook.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-// RUN: %clangxx_msan -O2 %s -o %t
-// RUN: %run %t 2>&1 | FileCheck %s
-#include <stdlib.h>
-#include <unistd.h>
-
-extern "C" {
-int __msan_get_ownership(const void *p);
-
-void *global_ptr;
-
-// Note: avoid calling functions that allocate memory in malloc/free
-// to avoid infinite recursion.
-void __msan_malloc_hook(void *ptr, size_t sz) {
-  if (__msan_get_ownership(ptr)) {
-    write(1, "MallocHook\n", sizeof("MallocHook\n"));
-    global_ptr = ptr;
-  }
-}
-void __msan_free_hook(void *ptr) {
-  if (__msan_get_ownership(ptr) && ptr == global_ptr)
-    write(1, "FreeHook\n", sizeof("FreeHook\n"));
-}
-}  // extern "C"
-
-int main() {
-  volatile int *x = new int;
-  // CHECK: MallocHook
-  // Check that malloc hook was called with correct argument.
-  if (global_ptr != (void*)x) {
-    _exit(1);
-  }
-  *x = 0;
-  delete x;
-  // CHECK: FreeHook
-  return 0;
-}
diff --git a/test/msan/msan_print_shadow3.cc b/test/msan/msan_print_shadow3.cc
new file mode 100644
index 0000000..c605ef1
--- /dev/null
+++ b/test/msan/msan_print_shadow3.cc
@@ -0,0 +1,16 @@
+// RUN: %clangxx_msan -m64 -O0 -g %s -o %t && %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+
+#include <stdint.h>
+#include <sanitizer/msan_interface.h>
+
+int main(void) {
+  unsigned long long x = 0; // For 8-byte alignment.
+  uint32_t x_s = 0x12345678U;
+  __msan_partial_poison(&x, &x_s, sizeof(x_s));
+  __msan_print_shadow(&x, sizeof(x_s));
+  return 0;
+}
+
+// CHECK: Shadow map of [{{.*}}), 4 bytes:
+// CHECK: 0x{{.*}}: 87654321 ........ ........ ........
diff --git a/test/msan/mul_by_const.cc b/test/msan/mul_by_const.cc
new file mode 100644
index 0000000..a975bb9
--- /dev/null
+++ b/test/msan/mul_by_const.cc
@@ -0,0 +1,27 @@
+// RUN: %clangxx_msan -m64 -O2 %s -o %t && %run %t
+
+#include <sanitizer/msan_interface.h>
+
+struct S {
+  S(int a0) : a(a0) {}
+  int a;
+  int b;
+};
+
+// Here S is passed to FooRun as a 64-bit integer.
+// This triggers an optimization where 10000 * s.a is transformed into
+// ((*(uint64_t *)&s) * (10000 * 2**32)) >> 32
+// Test that MSan understands that this kills the uninitialized high half of S
+// (i.e. S::b).
+void FooRun(S s) {
+  int64_t x = 10000 * s.a;
+  __msan_check_mem_is_initialized(&x, sizeof(x));
+}
+
+int main(void) {
+  S z(1);
+  // Take &z to ensure that it is built on stack.
+  S *volatile p = &z;
+  FooRun(z);
+  return 0;
+}
diff --git a/test/msan/no_sanitize_memory_prop.cc b/test/msan/no_sanitize_memory_prop.cc
index b41e8a1..4275ebb 100644
--- a/test/msan/no_sanitize_memory_prop.cc
+++ b/test/msan/no_sanitize_memory_prop.cc
@@ -1,16 +1,9 @@
 // RUN: %clangxx_msan -m64 -O0 %s -o %t && %run %t >%t.out 2>&1
-// RUN: %clangxx_msan -m64 -O1 %s -o %t && not %run %t >%t.out 2>&1
-// RUN: FileCheck %s < %t.out
-// RUN: %clangxx_msan -m64 -O2 %s -o %t && not %run %t >%t.out 2>&1
-// RUN: FileCheck %s < %t.out
-// RUN: %clangxx_msan -m64 -O3 %s -o %t && not %run %t >%t.out 2>&1
-// RUN: FileCheck %s < %t.out
+// RUN: %clangxx_msan -m64 -O1 %s -o %t && %run %t >%t.out 2>&1
+// RUN: %clangxx_msan -m64 -O2 %s -o %t && %run %t >%t.out 2>&1
+// RUN: %clangxx_msan -m64 -O3 %s -o %t && %run %t >%t.out 2>&1
 
-// Test that (no_sanitize_memory) functions propagate shadow.
-
-// Note that at -O0 there is no report, because 'x' in 'f' is spilled to the
-// stack, and then loaded back as a fully initialiazed value (due to
-// no_sanitize_memory attribute).
+// Test that (no_sanitize_memory) functions DO NOT propagate shadow.
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -25,8 +18,6 @@
   int x;
   int * volatile p = &x;
   int y = f(*p);
-  // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
-  // CHECK: {{#0 0x.* in main .*no_sanitize_memory_prop.cc:}}[[@LINE+1]]
   if (y)
     exit(0);
   return 0;
diff --git a/test/msan/print_stats.cc b/test/msan/print_stats.cc
index bee364b..7494383 100644
--- a/test/msan/print_stats.cc
+++ b/test/msan/print_stats.cc
@@ -3,6 +3,8 @@
 // RUN:   FileCheck --check-prefix=CHECK --check-prefix=CHECK-NOSTATS %s
 // RUN: MSAN_OPTIONS=print_stats=1 %run %t 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK --check-prefix=CHECK-NOSTATS %s
+// RUN: MSAN_OPTIONS=print_stats=1,atexit=1 %run %t 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK --check-prefix=CHECK-STATS %s
 
 // RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -m64 -g -DPOSITIVE=1 %s -o %t 
 // RUN: not %run %t 2>&1 | \
@@ -30,8 +32,6 @@
 
 // CHECK: TEST
 
-// CHECK-KEEPGOING: MemorySanitizer: 1 warnings reported.
-
 // CHECK-STATS: Unique heap origins:
 // CHECK-STATS: Stack depot allocated bytes:
 // CHECK-STATS: Unique origin histories:
@@ -41,3 +41,5 @@
 // CHECK-NOSTATS-NOT: Stack depot allocated bytes:
 // CHECK-NOSTATS-NOT: Unique origin histories:
 // CHECK-NOSTATS-NOT: History depot allocated bytes:
+
+// CHECK-KEEPGOING: MemorySanitizer: 1 warnings reported.
diff --git a/test/msan/vector_cvt.cc b/test/msan/vector_cvt.cc
index 6393339..bd9b6a8 100644
--- a/test/msan/vector_cvt.cc
+++ b/test/msan/vector_cvt.cc
@@ -8,7 +8,7 @@
   int x = _mm_cvtsd_si32(t);
   return x;
   // CHECK: WARNING: MemorySanitizer: use-of-uninitialized-value
-  // CHECK: #{{.*}} in to_int{{.*}}vector_cvt.cc:[[@LINE-4]]
+  // CHECK: #{{.*}} in to_int{{.*}}vector_cvt.cc:[[@LINE-3]]
 }
 
 int main() {
diff --git a/test/sanitizer_common/CMakeLists.txt b/test/sanitizer_common/CMakeLists.txt
index dcb09ca..13eecbd 100644
--- a/test/sanitizer_common/CMakeLists.txt
+++ b/test/sanitizer_common/CMakeLists.txt
@@ -10,6 +10,7 @@
 if(CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT ANDROID)
   list(APPEND SUPPORTED_TOOLS tsan)
   list(APPEND SUPPORTED_TOOLS msan)
+  list(APPEND SUPPORTED_TOOLS lsan)
 endif()
 
 # Create a separate config for each tool we support.
diff --git a/test/sanitizer_common/TestCases/Linux/aligned_alloc.c b/test/sanitizer_common/TestCases/Linux/aligned_alloc.c
new file mode 100644
index 0000000..12af18d
--- /dev/null
+++ b/test/sanitizer_common/TestCases/Linux/aligned_alloc.c
@@ -0,0 +1,8 @@
+// RUN: %clang -std=c11 -O0 %s -o %t && %run %t
+#include <stdlib.h>
+extern void *aligned_alloc (size_t alignment, size_t size);
+int main() {
+  volatile void *p = aligned_alloc(128, 1024);
+  free((void*)p);
+  return 0;
+}
diff --git a/test/sanitizer_common/TestCases/Linux/ptrace.cc b/test/sanitizer_common/TestCases/Linux/ptrace.cc
index 5b6a3e9..797e7b4 100644
--- a/test/sanitizer_common/TestCases/Linux/ptrace.cc
+++ b/test/sanitizer_common/TestCases/Linux/ptrace.cc
@@ -1,5 +1,5 @@
 // RUN: %clangxx -O0 %s -o %t && %run %t
-// XFAIL: arm
+// XFAIL: arm-linux-gnueabi
 
 #include <assert.h>
 #include <signal.h>
diff --git a/test/sanitizer_common/TestCases/malloc_hook.cc b/test/sanitizer_common/TestCases/malloc_hook.cc
new file mode 100644
index 0000000..686e098
--- /dev/null
+++ b/test/sanitizer_common/TestCases/malloc_hook.cc
@@ -0,0 +1,38 @@
+// RUN: %clangxx -O2 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+// Malloc/free hooks are not supported on Windows and doesn't work in LSan.
+// XFAIL: win32, lsan
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <sanitizer/allocator_interface.h>
+
+extern "C" {
+const volatile void *global_ptr;
+
+// Note: avoid calling functions that allocate memory in malloc/free
+// to avoid infinite recursion.
+void __sanitizer_malloc_hook(const volatile void *ptr, size_t sz) {
+  if (__sanitizer_get_ownership(ptr)) {
+    write(1, "MallocHook\n", sizeof("MallocHook\n"));
+    global_ptr = ptr;
+  }
+}
+void __sanitizer_free_hook(const volatile void *ptr) {
+  if (__sanitizer_get_ownership(ptr) && ptr == global_ptr)
+    write(1, "FreeHook\n", sizeof("FreeHook\n"));
+}
+}  // extern "C"
+
+int main() {
+  volatile int *x = new int;
+  // CHECK: MallocHook
+  // Check that malloc hook was called with correct argument.
+  if (global_ptr != (void*)x) {
+    _exit(1);
+  }
+  *x = 0;
+  delete x;
+  // CHECK: FreeHook
+  return 0;
+}
diff --git a/test/sanitizer_common/TestCases/print-stack-trace.cc b/test/sanitizer_common/TestCases/print-stack-trace.cc
index 98d9ddf..c84d0da 100644
--- a/test/sanitizer_common/TestCases/print-stack-trace.cc
+++ b/test/sanitizer_common/TestCases/print-stack-trace.cc
@@ -3,7 +3,7 @@
 //
 // Not yet implemented for TSan.
 // https://code.google.com/p/address-sanitizer/issues/detail?id=243
-// XFAIL: tsan
+// XFAIL: tsan,lsan
 
 #include <sanitizer/common_interface_defs.h>
 
diff --git a/test/sanitizer_common/TestCases/pthread_mutexattr_get.cc b/test/sanitizer_common/TestCases/pthread_mutexattr_get.cc
new file mode 100644
index 0000000..26060f3
--- /dev/null
+++ b/test/sanitizer_common/TestCases/pthread_mutexattr_get.cc
@@ -0,0 +1,19 @@
+// RUN: %clangxx -O0 %s -o %t && %run %t
+
+#include <assert.h>
+#include <pthread.h>
+
+int main(void) {
+  pthread_mutexattr_t ma;
+  int res = pthread_mutexattr_init(&ma);
+  assert(res == 0);
+  res = pthread_mutexattr_setpshared(&ma, 1);
+  assert(res == 0);
+  int pshared;
+  res = pthread_mutexattr_getpshared(&ma, &pshared);
+  assert(res == 0);
+  assert(pshared == 1);
+  res = pthread_mutexattr_destroy(&ma);
+  assert(res == 0);
+  return 0;
+}
diff --git a/test/sanitizer_common/lit.common.cfg b/test/sanitizer_common/lit.common.cfg
index 6e2d772..6e768b1 100644
--- a/test/sanitizer_common/lit.common.cfg
+++ b/test/sanitizer_common/lit.common.cfg
@@ -11,6 +11,8 @@
   tool_cflags = ["-fsanitize=thread"]
 elif config.tool_name == "msan":
   tool_cflags = ["-fsanitize=memory"]
+elif config.tool_name == "lsan":
+  tool_cflags = ["-fsanitize=leak"]
 else:
   lit_config.fatal("Unknown tool for sanitizer_common tests: %r" % config.tool_name)
 
diff --git a/test/tsan/CMakeLists.txt b/test/tsan/CMakeLists.txt
index 5b7be0c..29c0821 100644
--- a/test/tsan/CMakeLists.txt
+++ b/test/tsan/CMakeLists.txt
@@ -3,7 +3,7 @@
   list(APPEND TSAN_TEST_DEPS tsan)
 endif()
 if(COMPILER_RT_HAS_LIBCXX_SOURCES AND
-   COMPILER_RT_TEST_COMPILER STREQUAL "Clang")
+   COMPILER_RT_TEST_COMPILER_ID STREQUAL "Clang")
   list(APPEND TSAN_TEST_DEPS libcxx_tsan)
   set(TSAN_HAS_LIBCXX True)
 else()
diff --git a/test/tsan/Helpers/blacklist.txt b/test/tsan/Helpers/blacklist.txt
deleted file mode 100644
index 22225e5..0000000
--- a/test/tsan/Helpers/blacklist.txt
+++ /dev/null
@@ -1 +0,0 @@
-fun:*Blacklisted_Thread2*
diff --git a/test/tsan/Helpers/lit.local.cfg b/test/tsan/Helpers/lit.local.cfg
deleted file mode 100644
index 9246b10..0000000
--- a/test/tsan/Helpers/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-# Files in this directory are helper files for other output tests.
-config.suffixes = []
diff --git a/test/tsan/SharedLibs/lit.local.cfg b/test/tsan/SharedLibs/lit.local.cfg
deleted file mode 100644
index b3677c1..0000000
--- a/test/tsan/SharedLibs/lit.local.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-# Sources in this directory are compiled as shared libraries and used by
-# tests in parent directory.
-
-config.suffixes = []
diff --git a/test/tsan/SharedLibs/load_shared_lib-so.cc b/test/tsan/SharedLibs/load_shared_lib-so.cc
deleted file mode 100644
index f449fe9..0000000
--- a/test/tsan/SharedLibs/load_shared_lib-so.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-//===----------- load_shared_lib-so.cc --------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of ThreadSanitizer (TSan), a race detector.
-//
-//===----------------------------------------------------------------------===//
-
-#include <stddef.h>
-#include <unistd.h>
-
-int GLOB_SHARED = 0;
-
-extern "C"
-void *write_from_so(void *unused) {
-  if (unused)
-    sleep(1);
-  GLOB_SHARED++;
-  return NULL;
-}
diff --git a/test/tsan/atomic_free.cc b/test/tsan/atomic_free.cc
index c36b8fb..1dcf887 100644
--- a/test/tsan/atomic_free.cc
+++ b/test/tsan/atomic_free.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/atomic_free2.cc b/test/tsan/atomic_free2.cc
index 11204ba..c50be6b 100644
--- a/test/tsan/atomic_free2.cc
+++ b/test/tsan/atomic_free2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/atomic_race.cc b/test/tsan/atomic_race.cc
index e64ef96..ca444b4 100644
--- a/test/tsan/atomic_race.cc
+++ b/test/tsan/atomic_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 #include <stdio.h>
diff --git a/test/tsan/atomic_stack.cc b/test/tsan/atomic_stack.cc
index d28b5b9..7e3176f 100644
--- a/test/tsan/atomic_stack.cc
+++ b/test/tsan/atomic_stack.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/blacklist.cc b/test/tsan/blacklist.cc
index ab6f1d1..d6ca383 100644
--- a/test/tsan/blacklist.cc
+++ b/test/tsan/blacklist.cc
@@ -1,8 +1,7 @@
 // Test blacklist functionality for TSan.
 
-// RUN: %clangxx_tsan -O1 %s \
-// RUN:   -fsanitize-blacklist=%p/Helpers/blacklist.txt \
-// RUN:   -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: echo "fun:*Blacklisted_Thread2*" > %t.blacklist
+// RUN: %clangxx_tsan -O1 %s -fsanitize-blacklist=%t.blacklist -o %t && %run %t 2>&1 | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 
diff --git a/test/tsan/blacklist2.cc b/test/tsan/blacklist2.cc
new file mode 100644
index 0000000..1258208
--- /dev/null
+++ b/test/tsan/blacklist2.cc
@@ -0,0 +1,49 @@
+// Test that blacklisted functions are still contained in the stack trace.
+
+// RUN: echo "fun:*Blacklisted_Thread2*" > %t.blacklist
+// RUN: echo "fun:*CallTouchGlobal*" >> %t.blacklist
+
+// RUN: %clangxx_tsan -O1 %s -fsanitize-blacklist=%t.blacklist -o %t
+// RUN: %deflake %run %t 2>&1 | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+int Global;
+
+void *Thread1(void *x) {
+  sleep(1);
+  // CHECK: ThreadSanitizer: data race
+  // CHECK: Write of size 4
+  // CHECK: #0 Thread1{{.*}}blacklist2.cc:[[@LINE+1]]
+  Global++;
+  return NULL;
+}
+
+void TouchGlobal() {
+  // CHECK: Previous write of size 4
+  // CHECK: #0 TouchGlobal(){{.*}}blacklist2.cc:[[@LINE+1]]
+  Global--;
+}
+
+void CallTouchGlobal() {
+  // CHECK: #1 CallTouchGlobal{{.*}}blacklist2.cc:[[@LINE+1]]
+  TouchGlobal();
+}
+
+void *Blacklisted_Thread2(void *x) {
+  Global--;
+  // CHECK: #2 Blacklisted_Thread2{{.*}}blacklist2.cc:[[@LINE+1]]
+  CallTouchGlobal();
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Blacklisted_Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  printf("PASS\n");
+  return 0;
+}
diff --git a/test/tsan/cond_race.cc b/test/tsan/cond_race.cc
index 375bd92..fa42faf 100644
--- a/test/tsan/cond_race.cc
+++ b/test/tsan/cond_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 // CHECK-NOT: unlock of unlocked mutex
 // CHECK: ThreadSanitizer: data race
 // CHECK: pthread_cond_signal
diff --git a/test/tsan/deadlock_detector_stress_test.cc b/test/tsan/deadlock_detector_stress_test.cc
index ac9fcbd..5362478 100644
--- a/test/tsan/deadlock_detector_stress_test.cc
+++ b/test/tsan/deadlock_detector_stress_test.cc
@@ -1,12 +1,12 @@
 // RUN: %clangxx_tsan %s -o %t -DLockType=PthreadMutex
-// RUN: TSAN_OPTIONS=detect_deadlocks=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOT-SECOND
-// TSAN_OPTIONS="detect_deadlocks=1 second_deadlock_stack=1" not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SECOND
+// RUN: TSAN_OPTIONS=detect_deadlocks=1 %deflake %run %t | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOT-SECOND
+// TSAN_OPTIONS="detect_deadlocks=1 second_deadlock_stack=1" %deflake %run %t | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SECOND
 // RUN: %clangxx_tsan %s -o %t -DLockType=PthreadSpinLock
-// RUN: TSAN_OPTIONS=detect_deadlocks=1 not %run %t 2>&1 | FileCheck %s
+// RUN: TSAN_OPTIONS=detect_deadlocks=1 %deflake %run %t | FileCheck %s
 // RUN: %clangxx_tsan %s -o %t -DLockType=PthreadRWLock
-// RUN: TSAN_OPTIONS=detect_deadlocks=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-RD
+// RUN: TSAN_OPTIONS=detect_deadlocks=1 %deflake %run %t | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-RD
 // RUN: %clangxx_tsan %s -o %t -DLockType=PthreadRecursiveMutex
-// RUN: TSAN_OPTIONS=detect_deadlocks=1 not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-REC
+// RUN: TSAN_OPTIONS=detect_deadlocks=1 %deflake %run %t | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-REC
 #include <pthread.h>
 #undef NDEBUG
 #include <assert.h>
diff --git a/test/tsan/deep_stack1.cc b/test/tsan/deep_stack1.cc
index 617f102..1d00a0e 100644
--- a/test/tsan/deep_stack1.cc
+++ b/test/tsan/deep_stack1.cc
@@ -1,5 +1,5 @@
-// RUN: %clangxx_tsan -O1 %s -o %t -DORDER1 && not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_tsan -O1 %s -o %t -DORDER2 && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t -DORDER1 && %deflake %run %t | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t -DORDER2 && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/deflake.bash b/test/tsan/deflake.bash
new file mode 100755
index 0000000..9731fa5
--- /dev/null
+++ b/test/tsan/deflake.bash
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# This script is used to deflake inherently flaky tsan tests.
+# It is invoked from lit tests as:
+# %deflake mybinary
+# which is then substituted by lit to:
+# $(dirname %s)/deflake.bash mybinary
+# The script runs the target program up to 10 times,
+# until it fails (i.e. produces a race report).
+
+for i in $(seq 1 10); do
+	OUT=`$@ 2>&1`
+	if [[ $? != 0 ]]; then
+		echo "$OUT"
+		exit 0
+	fi
+done
+exit 1
diff --git a/test/tsan/fd_location.cc b/test/tsan/fd_location.cc
index ebe33be..535329e 100644
--- a/test/tsan/fd_location.cc
+++ b/test/tsan/fd_location.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/fd_pipe_race.cc b/test/tsan/fd_pipe_race.cc
index 384cafa..88c4ed4 100644
--- a/test/tsan/fd_pipe_race.cc
+++ b/test/tsan/fd_pipe_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/fd_stdout_race.cc b/test/tsan/fd_stdout_race.cc
index 47f3c6f..d6a2c7c 100644
--- a/test/tsan/fd_stdout_race.cc
+++ b/test/tsan/fd_stdout_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/free_race.c b/test/tsan/free_race.c
index 040f223..663d7bc 100644
--- a/test/tsan/free_race.c
+++ b/test/tsan/free_race.c
@@ -1,5 +1,5 @@
 // RUN: %clang_tsan -O1 %s -o %t
-// RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NOZUPP
+// RUN: %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOZUPP
 // RUN: TSAN_OPTIONS="suppressions=%s.supp print_suppressions=1" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-SUPP
 
 #include <pthread.h>
diff --git a/test/tsan/free_race2.c b/test/tsan/free_race2.c
index 2b979f3..de6b2ae 100644
--- a/test/tsan/free_race2.c
+++ b/test/tsan/free_race2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <stdlib.h>
 
 void __attribute__((noinline)) foo(int *mem) {
diff --git a/test/tsan/global_race.cc b/test/tsan/global_race.cc
index d28700c..224ab22 100644
--- a/test/tsan/global_race.cc
+++ b/test/tsan/global_race.cc
@@ -1,44 +1,26 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <stddef.h>
 #include <unistd.h>
 
 int GlobalData[10];
-int x;
-namespace XXX {
-  struct YYY {
-    static int ZZZ[10];
-  };
-  int YYY::ZZZ[10];
-}
 
 void *Thread(void *a) {
   sleep(1);
   GlobalData[2] = 42;
-  x = 1;
-  XXX::YYY::ZZZ[0] = 1;
   return 0;
 }
 
 int main() {
   fprintf(stderr, "addr=%p\n", GlobalData);
-  fprintf(stderr, "addr2=%p\n", &x);
-  fprintf(stderr, "addr3=%p\n", XXX::YYY::ZZZ);
   pthread_t t;
   pthread_create(&t, 0, Thread, 0);
   GlobalData[2] = 43;
-  x = 0;
-  XXX::YYY::ZZZ[0] = 0;
   pthread_join(t, 0);
 }
 
 // CHECK: addr=[[ADDR:0x[0-9,a-f]+]]
-// CHECK: addr2=[[ADDR2:0x[0-9,a-f]+]]
-// CHECK: addr3=[[ADDR3:0x[0-9,a-f]+]]
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK: Location is global 'GlobalData' of size 40 at [[ADDR]] ({{.*}}+0x{{[0-9,a-f]+}})
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK: Location is global 'x' of size 4 at [[ADDR2]] ({{.*}}+0x{{[0-9,a-f]+}})
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK: Location is global 'XXX::YYY::ZZZ' of size 40 at [[ADDR3]] ({{.*}}+0x{{[0-9,a-f]+}})
+
diff --git a/test/tsan/global_race2.cc b/test/tsan/global_race2.cc
new file mode 100644
index 0000000..b8352ba
--- /dev/null
+++ b/test/tsan/global_race2.cc
@@ -0,0 +1,26 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <unistd.h>
+
+int x;
+
+void *Thread(void *a) {
+  sleep(1);
+  x = 1;
+  return 0;
+}
+
+int main() {
+  fprintf(stderr, "addr2=%p\n", &x);
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  x = 0;
+  pthread_join(t, 0);
+}
+
+// CHECK: addr2=[[ADDR2:0x[0-9,a-f]+]]
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: Location is global 'x' of size 4 at [[ADDR2]] ({{.*}}+0x{{[0-9,a-f]+}})
+
diff --git a/test/tsan/global_race3.cc b/test/tsan/global_race3.cc
new file mode 100644
index 0000000..e37bf78
--- /dev/null
+++ b/test/tsan/global_race3.cc
@@ -0,0 +1,30 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <unistd.h>
+
+namespace XXX {
+  struct YYY {
+    static int ZZZ[10];
+  };
+  int YYY::ZZZ[10];
+}
+
+void *Thread(void *a) {
+  sleep(1);
+  XXX::YYY::ZZZ[0] = 1;
+  return 0;
+}
+
+int main() {
+  fprintf(stderr, "addr3=%p\n", XXX::YYY::ZZZ);
+  pthread_t t;
+  pthread_create(&t, 0, Thread, 0);
+  XXX::YYY::ZZZ[0] = 0;
+  pthread_join(t, 0);
+}
+
+// CHECK: addr3=[[ADDR3:0x[0-9,a-f]+]]
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: Location is global 'XXX::YYY::ZZZ' of size 40 at [[ADDR3]] ({{.*}}+0x{{[0-9,a-f]+}})
diff --git a/test/tsan/halt_on_error.cc b/test/tsan/halt_on_error.cc
index 4574801..3c55c60 100644
--- a/test/tsan/halt_on_error.cc
+++ b/test/tsan/halt_on_error.cc
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="$TSAN_OPTIONS halt_on_error=1" not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && TSAN_OPTIONS="$TSAN_OPTIONS halt_on_error=1" %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/heap_race.cc b/test/tsan/heap_race.cc
index 63591e0..c3da68f 100644
--- a/test/tsan/heap_race.cc
+++ b/test/tsan/heap_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <stddef.h>
diff --git a/test/tsan/ignore_lib0.cc b/test/tsan/ignore_lib0.cc
index d62aa99..fe1a355 100644
--- a/test/tsan/ignore_lib0.cc
+++ b/test/tsan/ignore_lib0.cc
@@ -1,7 +1,7 @@
 // RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -fno-sanitize=thread -shared -o %T/libignore_lib0.so
 // RUN: %clangxx_tsan -O1 %s -L%T -lignore_lib0 -o %t
 // RUN: echo running w/o suppressions:
-// RUN: LD_LIBRARY_PATH=%T${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NOSUPP
+// RUN: LD_LIBRARY_PATH=%T${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP
 // RUN: echo running with suppressions:
 // RUN: LD_LIBRARY_PATH=%T${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP
 
diff --git a/test/tsan/ignore_lib1.cc b/test/tsan/ignore_lib1.cc
index b848350..30a9994 100644
--- a/test/tsan/ignore_lib1.cc
+++ b/test/tsan/ignore_lib1.cc
@@ -1,7 +1,7 @@
 // RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -fno-sanitize=thread -shared -o %T/libignore_lib1.so
 // RUN: %clangxx_tsan -O1 %s -o %t
 // RUN: echo running w/o suppressions:
-// RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NOSUPP
+// RUN: %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP
 // RUN: echo running with suppressions:
 // RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP
 
diff --git a/test/tsan/ignore_lib2.cc b/test/tsan/ignore_lib2.cc
index 7467ec0..23a0872 100644
--- a/test/tsan/ignore_lib2.cc
+++ b/test/tsan/ignore_lib2.cc
@@ -1,7 +1,7 @@
 // RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -fno-sanitize=thread -shared -o %T/libignore_lib2_0.so
 // RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -fno-sanitize=thread -shared -o %T/libignore_lib2_1.so
 // RUN: %clangxx_tsan -O1 %s -o %t
-// RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" not %run %t 2>&1 | FileCheck %s
+// RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %deflake %run %t | FileCheck %s
 
 // Tests that called_from_lib suppression matched against 2 libraries
 // causes program crash (this is not supported).
diff --git a/test/tsan/ignore_lib3.cc b/test/tsan/ignore_lib3.cc
index 6a5f5cc..137109e 100644
--- a/test/tsan/ignore_lib3.cc
+++ b/test/tsan/ignore_lib3.cc
@@ -1,6 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -DLIB -fPIC -fno-sanitize=thread -shared -o %T/libignore_lib3.so
 // RUN: %clangxx_tsan -O1 %s -o %t
-// RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" not %run %t 2>&1 | FileCheck %s
+// RUN: TSAN_OPTIONS="$TSAN_OPTIONS suppressions=%s.supp" %deflake %run %t | FileCheck %s
 
 // Tests that unloading of a library matched against called_from_lib suppression
 // causes program crash (this is not supported).
diff --git a/test/tsan/ignore_sync.cc b/test/tsan/ignore_sync.cc
index ae6edae..ae24a8c 100644
--- a/test/tsan/ignore_sync.cc
+++ b/test/tsan/ignore_sync.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 
diff --git a/test/tsan/inlined_memcpy_race.cc b/test/tsan/inlined_memcpy_race.cc
index 12f82d2..a95576a 100644
--- a/test/tsan/inlined_memcpy_race.cc
+++ b/test/tsan/inlined_memcpy_race.cc
@@ -1,22 +1,17 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 
-int x[4], y[4], z[4];
+int x[4], z[4];
 
 void *MemCpyThread(void *a) {
   memcpy((int*)a, z, 16);
   return NULL;
 }
 
-void *MemMoveThread(void *a) {
-  memmove((int*)a, z, 16);
-  return NULL;
-}
-
 void *MemSetThread(void *a) {
   sleep(1);
   memset((int*)a, 0, 16);
@@ -30,12 +25,6 @@
   pthread_create(&t[1], NULL, MemSetThread, x);
   pthread_join(t[0], NULL);
   pthread_join(t[1], NULL);
-  // Race on y between memmove and memset
-  pthread_create(&t[0], NULL, MemMoveThread, y);
-  pthread_create(&t[1], NULL, MemSetThread, y);
-  pthread_join(t[0], NULL);
-  pthread_join(t[1], NULL);
-
   printf("PASS\n");
   return 0;
 }
@@ -47,9 +36,3 @@
 // CHECK:   #0 memcpy
 // CHECK:   #1 MemCpyThread
 
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK:   #0 memset
-// CHECK:   #1 MemSetThread
-// CHECK:  Previous write
-// CHECK:   #0 memmove
-// CHECK:   #1 MemMoveThread
diff --git a/test/tsan/inlined_memcpy_race2.cc b/test/tsan/inlined_memcpy_race2.cc
new file mode 100644
index 0000000..63b560f
--- /dev/null
+++ b/test/tsan/inlined_memcpy_race2.cc
@@ -0,0 +1,38 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+#include <pthread.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+int y[4], z[4];
+
+void *MemMoveThread(void *a) {
+  memmove((int*)a, z, 16);
+  return NULL;
+}
+
+void *MemSetThread(void *a) {
+  sleep(1);
+  memset((int*)a, 0, 16);
+  return NULL;
+}
+
+int main() {
+  pthread_t t[2];
+  // Race on y between memmove and memset
+  pthread_create(&t[0], NULL, MemMoveThread, y);
+  pthread_create(&t[1], NULL, MemSetThread, y);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+
+  printf("PASS\n");
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK:   #0 memset
+// CHECK:   #1 MemSetThread
+// CHECK:  Previous write
+// CHECK:   #0 memmove
+// CHECK:   #1 MemMoveThread
diff --git a/test/tsan/java.h b/test/tsan/java.h
index 7aa0bca..d986d08 100644
--- a/test/tsan/java.h
+++ b/test/tsan/java.h
@@ -11,6 +11,7 @@
 void __tsan_java_alloc(jptr ptr, jptr size);
 void __tsan_java_free(jptr ptr, jptr size);
 void __tsan_java_move(jptr src, jptr dst, jptr size);
+void __tsan_java_finalize();
 void __tsan_java_mutex_lock(jptr addr);
 void __tsan_java_mutex_unlock(jptr addr);
 void __tsan_java_mutex_read_lock(jptr addr);
diff --git a/test/tsan/java_alloc.cc b/test/tsan/java_alloc.cc
index 0c9c4eb..4a606f7 100644
--- a/test/tsan/java_alloc.cc
+++ b/test/tsan/java_alloc.cc
@@ -19,14 +19,20 @@
 }
 
 int main() {
-  jptr jheap = (jptr)malloc(kHeapSize);
+  jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
   __tsan_java_init(jheap, kHeapSize);
   pthread_t th;
   pthread_create(&th, 0, Thread, (void*)(jheap + kHeapSize / 4));
   stress(jheap);
   pthread_join(th, 0);
-  printf("OK\n");
-  return __tsan_java_fini();
+  if (__tsan_java_fini() != 0) {
+    printf("FAILED\n");
+    return 1;
+  }
+  printf("DONE\n");
+  return 0;
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: FAILED
+// CHECK: DONE
diff --git a/test/tsan/java_finalizer.cc b/test/tsan/java_finalizer.cc
new file mode 100644
index 0000000..d5c6a22
--- /dev/null
+++ b/test/tsan/java_finalizer.cc
@@ -0,0 +1,27 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "java.h"
+
+void *Thread(void *p) {
+  sleep(1);
+  __tsan_java_finalize();
+  *(int*)p = 42;
+  return 0;
+}
+
+int main() {
+  int const kHeapSize = 1024 * 1024;
+  jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
+  __tsan_java_init(jheap, kHeapSize);
+  const int kBlockSize = 16;
+  __tsan_java_alloc(jheap, kBlockSize);
+  pthread_t th;
+  pthread_create(&th, 0, Thread, (void*)jheap);
+  *(int*)jheap = 43;
+  pthread_join(th, 0);
+  __tsan_java_free(jheap, kBlockSize);
+  fprintf(stderr, "DONE\n");
+  return __tsan_java_fini();
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/java_lock.cc b/test/tsan/java_lock.cc
index e5513cd..36a0f8b 100644
--- a/test/tsan/java_lock.cc
+++ b/test/tsan/java_lock.cc
@@ -15,21 +15,22 @@
 
 int main() {
   int const kHeapSize = 1024 * 1024;
-  void *jheap = malloc(kHeapSize);
-  __tsan_java_init((jptr)jheap, kHeapSize);
+  jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
+  __tsan_java_init(jheap, kHeapSize);
   const int kBlockSize = 16;
-  __tsan_java_alloc((jptr)jheap, kBlockSize);
-  varaddr = (jptr)jheap;
-  lockaddr = (jptr)jheap + 8;
+  __tsan_java_alloc(jheap, kBlockSize);
+  varaddr = jheap;
+  lockaddr = jheap + 8;
   pthread_t th;
   pthread_create(&th, 0, Thread, 0);
   __tsan_java_mutex_lock(lockaddr);
   *(int*)varaddr = 43;
   __tsan_java_mutex_unlock(lockaddr);
   pthread_join(th, 0);
-  __tsan_java_free((jptr)jheap, kBlockSize);
-  printf("OK\n");
+  __tsan_java_free(jheap, kBlockSize);
+  fprintf(stderr, "DONE\n");
   return __tsan_java_fini();
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/java_lock_move.cc b/test/tsan/java_lock_move.cc
index 15a72c7..19c3e35 100644
--- a/test/tsan/java_lock_move.cc
+++ b/test/tsan/java_lock_move.cc
@@ -16,13 +16,13 @@
 
 int main() {
   int const kHeapSize = 1024 * 1024;
-  void *jheap = malloc(kHeapSize);
-  __tsan_java_init((jptr)jheap, kHeapSize);
+  jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
+  __tsan_java_init(jheap, kHeapSize);
   const int kBlockSize = 64;
   int const kMove = 1024;
-  __tsan_java_alloc((jptr)jheap, kBlockSize);
-  varaddr = (jptr)jheap;
-  lockaddr = (jptr)jheap + 46;
+  __tsan_java_alloc(jheap, kBlockSize);
+  varaddr = jheap;
+  lockaddr = jheap + 46;
   varaddr2 = varaddr + kMove;
   lockaddr2 = lockaddr + kMove;
   pthread_t th;
@@ -33,8 +33,9 @@
   __tsan_java_move(varaddr, varaddr2, kBlockSize);
   pthread_join(th, 0);
   __tsan_java_free(varaddr2, kBlockSize);
-  printf("OK\n");
+  printf("DONE\n");
   return __tsan_java_fini();
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/java_lock_rec.cc b/test/tsan/java_lock_rec.cc
index 9223695..2b0ab0e 100644
--- a/test/tsan/java_lock_rec.cc
+++ b/test/tsan/java_lock_rec.cc
@@ -27,13 +27,13 @@
 
 int main() {
   int const kHeapSize = 1024 * 1024;
-  void *jheap = malloc(kHeapSize);
-  __tsan_java_init((jptr)jheap, kHeapSize);
+  jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
+  __tsan_java_init(jheap, kHeapSize);
   const int kBlockSize = 16;
-  __tsan_java_alloc((jptr)jheap, kBlockSize);
-  varaddr = (jptr)jheap;
+  __tsan_java_alloc(jheap, kBlockSize);
+  varaddr = jheap;
   *(int*)varaddr = 0;
-  lockaddr = (jptr)jheap + 8;
+  lockaddr = jheap + 8;
   pthread_t th;
   pthread_create(&th, 0, Thread, 0);
   sleep(1);
@@ -45,10 +45,11 @@
   *(int*)varaddr = 43;
   __tsan_java_mutex_unlock(lockaddr);
   pthread_join(th, 0);
-  __tsan_java_free((jptr)jheap, kBlockSize);
-  printf("OK\n");
+  __tsan_java_free(jheap, kBlockSize);
+  printf("DONE\n");
   return __tsan_java_fini();
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
 // CHECK-NOT: FAILED
+// CHECK: DONE
diff --git a/test/tsan/java_lock_rec_race.cc b/test/tsan/java_lock_rec_race.cc
index 41aa1ca..841aa39 100644
--- a/test/tsan/java_lock_rec_race.cc
+++ b/test/tsan/java_lock_rec_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include "java.h"
 #include <unistd.h>
 
@@ -25,13 +25,13 @@
 
 int main() {
   int const kHeapSize = 1024 * 1024;
-  void *jheap = malloc(kHeapSize);
-  __tsan_java_init((jptr)jheap, kHeapSize);
+  jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
+  __tsan_java_init(jheap, kHeapSize);
   const int kBlockSize = 16;
-  __tsan_java_alloc((jptr)jheap, kBlockSize);
-  varaddr = (jptr)jheap;
+  __tsan_java_alloc(jheap, kBlockSize);
+  varaddr = jheap;
   *(int*)varaddr = 0;
-  lockaddr = (jptr)jheap + 8;
+  lockaddr = jheap + 8;
   pthread_t th;
   pthread_create(&th, 0, Thread, 0);
   sleep(1);
@@ -39,10 +39,11 @@
   *(int*)varaddr = 43;
   __tsan_java_mutex_unlock(lockaddr);
   pthread_join(th, 0);
-  __tsan_java_free((jptr)jheap, kBlockSize);
-  printf("OK\n");
+  __tsan_java_free(jheap, kBlockSize);
+  printf("DONE\n");
   return __tsan_java_fini();
 }
 
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK-NOT: FAILED
+// CHECK: DONE
diff --git a/test/tsan/java_move_overlap.cc b/test/tsan/java_move_overlap.cc
new file mode 100644
index 0000000..12955b4
--- /dev/null
+++ b/test/tsan/java_move_overlap.cc
@@ -0,0 +1,72 @@
+// RUN: %clangxx_tsan -O1 %s -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+// RUN: %run %t arg 2>&1 | FileCheck %s
+#include "java.h"
+
+jptr varaddr1_old;
+jptr varaddr2_old;
+jptr lockaddr1_old;
+jptr lockaddr2_old;
+jptr varaddr1_new;
+jptr varaddr2_new;
+jptr lockaddr1_new;
+jptr lockaddr2_new;
+
+void *Thread(void *p) {
+  sleep(1);
+  __tsan_java_mutex_lock(lockaddr1_new);
+  *(char*)varaddr1_new = 43;
+  __tsan_java_mutex_unlock(lockaddr1_new);
+  __tsan_java_mutex_lock(lockaddr2_new);
+  *(char*)varaddr2_new = 43;
+  __tsan_java_mutex_unlock(lockaddr2_new);
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int const kHeapSize = 1024 * 1024;
+  void *jheap = malloc(kHeapSize);
+  jheap = (char*)jheap + 8;
+  __tsan_java_init((jptr)jheap, kHeapSize);
+  const int kBlockSize = 64;
+  int const kMove = 32;
+  varaddr1_old = (jptr)jheap;
+  lockaddr1_old = (jptr)jheap + 1;
+  varaddr2_old = (jptr)jheap + kBlockSize - 1;
+  lockaddr2_old = (jptr)jheap + kBlockSize - 16;
+  varaddr1_new = varaddr1_old + kMove;
+  lockaddr1_new = lockaddr1_old + kMove;
+  varaddr2_new = varaddr2_old + kMove;
+  lockaddr2_new = lockaddr2_old + kMove;
+  if (argc > 1) {
+    // Move memory backwards.
+    varaddr1_old += kMove;
+    lockaddr1_old += kMove;
+    varaddr2_old += kMove;
+    lockaddr2_old += kMove;
+    varaddr1_new -= kMove;
+    lockaddr1_new -= kMove;
+    varaddr2_new -= kMove;
+    lockaddr2_new -= kMove;
+  }
+  __tsan_java_alloc(varaddr1_old, kBlockSize);
+
+  pthread_t th;
+  pthread_create(&th, 0, Thread, 0);
+
+  __tsan_java_mutex_lock(lockaddr1_old);
+  *(char*)varaddr1_old = 43;
+  __tsan_java_mutex_unlock(lockaddr1_old);
+  __tsan_java_mutex_lock(lockaddr2_old);
+  *(char*)varaddr2_old = 43;
+  __tsan_java_mutex_unlock(lockaddr2_old);
+
+  __tsan_java_move(varaddr1_old, varaddr1_new, kBlockSize);
+  pthread_join(th, 0);
+  __tsan_java_free(varaddr1_new, kBlockSize);
+  printf("DONE\n");
+  return __tsan_java_fini();
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/java_move_overlap_race.cc b/test/tsan/java_move_overlap_race.cc
new file mode 100644
index 0000000..2b3769b
--- /dev/null
+++ b/test/tsan/java_move_overlap_race.cc
@@ -0,0 +1,53 @@
+// RUN: %clangxx_tsan -O1 %s -o %t
+// RUN: %deflake %run %t | FileCheck %s
+// RUN: %deflake %run %t arg | FileCheck %s
+#include "java.h"
+
+jptr varaddr1_old;
+jptr varaddr2_old;
+jptr varaddr1_new;
+jptr varaddr2_new;
+
+void *Thread(void *p) {
+  sleep(1);
+  *(int*)varaddr1_new = 43;
+  *(int*)varaddr2_new = 43;
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int const kHeapSize = 1024 * 1024;
+  void *jheap = malloc(kHeapSize);
+  jheap = (char*)jheap + 8;
+  __tsan_java_init((jptr)jheap, kHeapSize);
+  const int kBlockSize = 64;
+  int const kMove = 32;
+  varaddr1_old = (jptr)jheap;
+  varaddr2_old = (jptr)jheap + kBlockSize - 1;
+  varaddr1_new = varaddr1_old + kMove;
+  varaddr2_new = varaddr2_old + kMove;
+  if (argc > 1) {
+    // Move memory backwards.
+    varaddr1_old += kMove;
+    varaddr2_old += kMove;
+    varaddr1_new -= kMove;
+    varaddr2_new -= kMove;
+  }
+  __tsan_java_alloc(varaddr1_old, kBlockSize);
+
+  pthread_t th;
+  pthread_create(&th, 0, Thread, 0);
+
+  *(int*)varaddr1_old = 43;
+  *(int*)varaddr2_old = 43;
+
+  __tsan_java_move(varaddr1_old, varaddr1_new, kBlockSize);
+  pthread_join(th, 0);
+  __tsan_java_free(varaddr1_new, kBlockSize);
+  printf("DONE\n");
+  return __tsan_java_fini();
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/java_race.cc b/test/tsan/java_race.cc
index 70ad8c4..ede058e 100644
--- a/test/tsan/java_race.cc
+++ b/test/tsan/java_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include "java.h"
 
 void *Thread(void *p) {
@@ -8,16 +8,18 @@
 
 int main() {
   int const kHeapSize = 1024 * 1024;
-  void *jheap = malloc(kHeapSize);
-  __tsan_java_init((jptr)jheap, kHeapSize);
+  jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
+  __tsan_java_init(jheap, kHeapSize);
   const int kBlockSize = 16;
-  __tsan_java_alloc((jptr)jheap, kBlockSize);
+  __tsan_java_alloc(jheap, kBlockSize);
   pthread_t th;
-  pthread_create(&th, 0, Thread, jheap);
+  pthread_create(&th, 0, Thread, (void*)jheap);
   *(int*)jheap = 43;
   pthread_join(th, 0);
-  __tsan_java_free((jptr)jheap, kBlockSize);
+  __tsan_java_free(jheap, kBlockSize);
+  fprintf(stderr, "DONE\n");
   return __tsan_java_fini();
 }
 
 // CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/java_race_move.cc b/test/tsan/java_race_move.cc
index 76da91b..8a51be9 100644
--- a/test/tsan/java_race_move.cc
+++ b/test/tsan/java_race_move.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include "java.h"
 
 jptr varaddr;
@@ -12,12 +12,12 @@
 
 int main() {
   int const kHeapSize = 1024 * 1024;
-  void *jheap = malloc(kHeapSize);
-  __tsan_java_init((jptr)jheap, kHeapSize);
+  jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
+  __tsan_java_init(jheap, kHeapSize);
   const int kBlockSize = 64;
   int const kMove = 1024;
-  __tsan_java_alloc((jptr)jheap, kBlockSize);
-  varaddr = (jptr)jheap + 16;
+  __tsan_java_alloc(jheap, kBlockSize);
+  varaddr = jheap + 16;
   varaddr2 = varaddr + kMove;
   pthread_t th;
   pthread_create(&th, 0, Thread, 0);
@@ -25,7 +25,9 @@
   __tsan_java_move(varaddr, varaddr2, kBlockSize);
   pthread_join(th, 0);
   __tsan_java_free(varaddr2, kBlockSize);
+  fprintf(stderr, "DONE\n");
   return __tsan_java_fini();
 }
 
 // CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/java_rwlock.cc b/test/tsan/java_rwlock.cc
index d43dfe1..b03afa6 100644
--- a/test/tsan/java_rwlock.cc
+++ b/test/tsan/java_rwlock.cc
@@ -15,21 +15,22 @@
 
 int main() {
   int const kHeapSize = 1024 * 1024;
-  void *jheap = malloc(kHeapSize);
-  __tsan_java_init((jptr)jheap, kHeapSize);
+  jptr jheap = (jptr)malloc(kHeapSize + 8) + 8;
+  __tsan_java_init(jheap, kHeapSize);
   const int kBlockSize = 16;
-  __tsan_java_alloc((jptr)jheap, kBlockSize);
-  varaddr = (jptr)jheap;
-  lockaddr = (jptr)jheap + 8;
+  __tsan_java_alloc(jheap, kBlockSize);
+  varaddr = jheap;
+  lockaddr = jheap + 8;
   pthread_t th;
   pthread_create(&th, 0, Thread, 0);
   __tsan_java_mutex_lock(lockaddr);
   *(int*)varaddr = 43;
   __tsan_java_mutex_unlock(lockaddr);
   pthread_join(th, 0);
-  __tsan_java_free((jptr)jheap, kBlockSize);
-  printf("OK\n");
+  __tsan_java_free(jheap, kBlockSize);
+  printf("DONE\n");
   return __tsan_java_fini();
 }
 
 // CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
diff --git a/test/tsan/lit.cfg b/test/tsan/lit.cfg
index ae21971..f7051b3 100644
--- a/test/tsan/lit.cfg
+++ b/test/tsan/lit.cfg
@@ -24,7 +24,7 @@
 
 # GCC driver doesn't add necessary compile/link flags with -fsanitize=thread.
 if config.compiler_id == 'GNU':
-  extra_cflags = ["-fPIE", "-lpthread", "-ldl", "-lstdc++", "-lrt", "-pie"]
+  extra_cflags = ["-fPIE", "-pthread", "-ldl", "-lstdc++", "-lrt", "-pie"]
 else:
   extra_cflags = []
 
@@ -57,6 +57,8 @@
 # Define CHECK-%os to check for OS-dependent output.
 config.substitutions.append( ('CHECK-%os', ("CHECK-" + config.host_os)))
 
+config.substitutions.append( ("%deflake ", os.path.join(os.path.dirname(__file__), "deflake.bash")) )
+
 # Default test suffixes.
 config.suffixes = ['.c', '.cc', '.cpp']
 
diff --git a/test/tsan/load_shared_lib.cc b/test/tsan/load_shared_lib.cc
index 2b250e3..a27dc1c 100644
--- a/test/tsan/load_shared_lib.cc
+++ b/test/tsan/load_shared_lib.cc
@@ -2,9 +2,25 @@
 // reports, the second report occurring in a new shared library is still
 // symbolized correctly.
 
-// RUN: %clangxx_tsan -O1 %p/SharedLibs/load_shared_lib-so.cc \
-// RUN:     -fPIC -shared -o %t-so.so
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -DBUILD_SO -fPIC -shared -o %t-so.so
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+
+#ifdef BUILD_SO
+
+#include <stddef.h>
+#include <unistd.h>
+
+int GLOB_SHARED = 0;
+
+extern "C"
+void *write_from_so(void *unused) {
+  if (unused)
+    sleep(1);
+  GLOB_SHARED++;
+  return NULL;
+}
+
+#else  // BUILD_SO
 
 #include <dlfcn.h>
 #include <pthread.h>
@@ -46,3 +62,5 @@
   // CHECK: write_from_so
   return 0;
 }
+
+#endif  // BUILD_SO
diff --git a/test/tsan/longjmp3.cc b/test/tsan/longjmp3.cc
index afb4996..71d964d 100644
--- a/test/tsan/longjmp3.cc
+++ b/test/tsan/longjmp3.cc
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/tsan/longjmp4.cc b/test/tsan/longjmp4.cc
index 1ec3b4b..15330f5 100644
--- a/test/tsan/longjmp4.cc
+++ b/test/tsan/longjmp4.cc
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/tsan/malloc_hook.cc b/test/tsan/malloc_hook.cc
deleted file mode 100644
index 63e1c3c..0000000
--- a/test/tsan/malloc_hook.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stddef.h>
-
-static int malloc_count;
-static int free_count;
-
-extern "C" {
-void __tsan_malloc_hook(void *ptr, size_t size) {
-  (void)ptr;
-  (void)size;
-  __sync_fetch_and_add(&malloc_count, 1);
-}
-
-void __tsan_free_hook(void *ptr) {
-  (void)ptr;
-  __sync_fetch_and_add(&free_count, 1);
-}
-}
-
-void *Thread1(void *x) {
-  ((int*)x)[0]++;
-  return 0;
-}
-
-void *Thread2(void *x) {
-  sleep(1);
-  ((int*)x)[0]++;
-  return 0;
-}
-
-int main() {
-  int *x = new int;
-  pthread_t t[2];
-  pthread_create(&t[0], 0, Thread1, x);
-  pthread_create(&t[1], 0, Thread2, x);
-  pthread_join(t[0], 0);
-  pthread_join(t[1], 0);
-  delete x;
-  if (malloc_count == 0 || free_count == 0) {
-    fprintf(stderr, "FAILED %d %d\n", malloc_count, free_count);
-    exit(1);
-  }
-  fprintf(stderr, "DONE\n");
-}
-
-// CHECK: WARNING: ThreadSanitizer: data race
-// CHECK-NOT: FAILED
-// CHECK: DONE
diff --git a/test/tsan/malloc_stack.cc b/test/tsan/malloc_stack.cc
index cea6020..6027360 100644
--- a/test/tsan/malloc_stack.cc
+++ b/test/tsan/malloc_stack.cc
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/memcpy_race.cc b/test/tsan/memcpy_race.cc
index d7314d2..8ec8e0a 100644
--- a/test/tsan/memcpy_race.cc
+++ b/test/tsan/memcpy_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stddef.h>
 #include <stdio.h>
diff --git a/test/tsan/mop_with_offset.cc b/test/tsan/mop_with_offset.cc
index e934279..e44c78b 100644
--- a/test/tsan/mop_with_offset.cc
+++ b/test/tsan/mop_with_offset.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stddef.h>
 #include <stdio.h>
diff --git a/test/tsan/mop_with_offset2.cc b/test/tsan/mop_with_offset2.cc
index 31b4a40..a465d5f 100644
--- a/test/tsan/mop_with_offset2.cc
+++ b/test/tsan/mop_with_offset2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stddef.h>
 #include <stdio.h>
diff --git a/test/tsan/mutex_bad_read_lock.cc b/test/tsan/mutex_bad_read_lock.cc
index c64bf88..84a2976 100644
--- a/test/tsan/mutex_bad_read_lock.cc
+++ b/test/tsan/mutex_bad_read_lock.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 extern "C" void AnnotateRWLockAcquired(const char *f, int l, void *m, long rw);
 
 int main() {
diff --git a/test/tsan/mutex_bad_read_unlock.cc b/test/tsan/mutex_bad_read_unlock.cc
index ab417f4..dcee515 100644
--- a/test/tsan/mutex_bad_read_unlock.cc
+++ b/test/tsan/mutex_bad_read_unlock.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 extern "C" void AnnotateRWLockAcquired(const char *f, int l, void *m, long rw);
 extern "C" void AnnotateRWLockReleased(const char *f, int l, void *m, long rw);
 
diff --git a/test/tsan/mutex_bad_unlock.cc b/test/tsan/mutex_bad_unlock.cc
index 3019035..6b483cf 100644
--- a/test/tsan/mutex_bad_unlock.cc
+++ b/test/tsan/mutex_bad_unlock.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 extern "C" void AnnotateRWLockReleased(const char *f, int l, void *m, long rw);
 
 int main() {
diff --git a/test/tsan/mutex_cycle2.c b/test/tsan/mutex_cycle2.c
index fb2b533..031830d 100644
--- a/test/tsan/mutex_cycle2.c
+++ b/test/tsan/mutex_cycle2.c
@@ -1,10 +1,13 @@
 // RUN: %clangxx_tsan %s -o %t
+// RUN:                                 not %run %t 2>&1 | FileCheck %s
 // RUN: TSAN_OPTIONS=detect_deadlocks=1 not %run %t 2>&1 | FileCheck %s
-// RUN: echo "deadlock:main" > sup
-// RUN: TSAN_OPTIONS="detect_deadlocks=1 suppressions=sup" %run %t
-// RUN: echo "deadlock:zzzz" > sup
-// RUN: TSAN_OPTIONS="detect_deadlocks=1 suppressions=sup" not %run %t 2>&1 | FileCheck %s
+// RUN: TSAN_OPTIONS=detect_deadlocks=0     %run %t 2>&1 | FileCheck %s --check-prefix=DISABLED
+// RUN: echo "deadlock:main" > %t.sup
+// RUN: TSAN_OPTIONS="suppressions=%t.sup" %run %t 2>&1 | FileCheck %s --check-prefix=DISABLED
+// RUN: echo "deadlock:zzzz" > %t.sup
+// RUN: TSAN_OPTIONS="suppressions=%t.sup" not %run %t 2>&1 | FileCheck %s
 #include <pthread.h>
+#include <stdio.h>
 
 int main() {
   pthread_mutex_t mu1, mu2;
@@ -21,9 +24,12 @@
   pthread_mutex_lock(&mu2);
   pthread_mutex_lock(&mu1);
   // CHECK: ThreadSanitizer: lock-order-inversion (potential deadlock)
+  // DISABLED-NOT: ThreadSanitizer
+  // DISABLED: PASS
   pthread_mutex_unlock(&mu1);
   pthread_mutex_unlock(&mu2);
 
   pthread_mutex_destroy(&mu1);
   pthread_mutex_destroy(&mu2);
+  fprintf(stderr, "PASS\n");
 }
diff --git a/test/tsan/mutex_destroy_locked.cc b/test/tsan/mutex_destroy_locked.cc
index 6c5dcbf..b81905e 100644
--- a/test/tsan/mutex_destroy_locked.cc
+++ b/test/tsan/mutex_destroy_locked.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/mutex_double_lock.cc b/test/tsan/mutex_double_lock.cc
index 551d911..c1bebf7 100644
--- a/test/tsan/mutex_double_lock.cc
+++ b/test/tsan/mutex_double_lock.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/mutex_robust2.cc b/test/tsan/mutex_robust2.cc
index f3125c1..0914c17 100644
--- a/test/tsan/mutex_robust2.cc
+++ b/test/tsan/mutex_robust2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdlib.h>
 #include <stdio.h>
diff --git a/test/tsan/mutexset1.cc b/test/tsan/mutexset1.cc
index 49607d7..72964ed 100644
--- a/test/tsan/mutexset1.cc
+++ b/test/tsan/mutexset1.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/mutexset2.cc b/test/tsan/mutexset2.cc
index 80b690d..01a5f5d 100644
--- a/test/tsan/mutexset2.cc
+++ b/test/tsan/mutexset2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/mutexset3.cc b/test/tsan/mutexset3.cc
index 17c27ea..e14bb11 100644
--- a/test/tsan/mutexset3.cc
+++ b/test/tsan/mutexset3.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/mutexset4.cc b/test/tsan/mutexset4.cc
index f56d37a..db860e0 100644
--- a/test/tsan/mutexset4.cc
+++ b/test/tsan/mutexset4.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/mutexset5.cc b/test/tsan/mutexset5.cc
index ea12553..e1cc2fc 100644
--- a/test/tsan/mutexset5.cc
+++ b/test/tsan/mutexset5.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/mutexset6.cc b/test/tsan/mutexset6.cc
index a42a743..07dcc0a 100644
--- a/test/tsan/mutexset6.cc
+++ b/test/tsan/mutexset6.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/mutexset7.cc b/test/tsan/mutexset7.cc
index a8a907a..1217484 100644
--- a/test/tsan/mutexset7.cc
+++ b/test/tsan/mutexset7.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -13,12 +13,13 @@
 }
 
 void *Thread2(void *x) {
-  pthread_mutex_t mtx;
-  pthread_mutex_init(&mtx, 0);
-  pthread_mutex_lock(&mtx);
+  pthread_mutex_t *mtx = new pthread_mutex_t;
+  pthread_mutex_init(mtx, 0);
+  pthread_mutex_lock(mtx);
   Global--;
-  pthread_mutex_unlock(&mtx);
-  pthread_mutex_destroy(&mtx);
+  pthread_mutex_unlock(mtx);
+  pthread_mutex_destroy(mtx);
+  delete mtx;
   return NULL;
 }
 
diff --git a/test/tsan/mutexset8.cc b/test/tsan/mutexset8.cc
index 23449ab..3e1ab8c 100644
--- a/test/tsan/mutexset8.cc
+++ b/test/tsan/mutexset8.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/oob_race.cc b/test/tsan/oob_race.cc
deleted file mode 100644
index 16c59c6..0000000
--- a/test/tsan/oob_race.cc
+++ /dev/null
@@ -1,26 +0,0 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
-#include <pthread.h>
-#include <stdio.h>
-#include <unistd.h>
-
-const long kOffset = 64*1024;
-
-void *Thread(void *p) {
-  sleep(1);
-  ((char*)p)[-kOffset] = 43;
-  return 0;
-}
-
-int main() {
-  char *volatile p0 = new char[16];
-  delete[] p0;
-  char *p = new char[32];
-  pthread_t th;
-  pthread_create(&th, 0, Thread, p);
-  p[-kOffset] = 42;
-  pthread_join(th, 0);
-}
-
-// Used to crash with CHECK failed.
-// CHECK: WARNING: ThreadSanitizer: data race
-
diff --git a/test/tsan/pthread_atfork_deadlock.c b/test/tsan/pthread_atfork_deadlock.c
index 3e91f3e..965de05 100644
--- a/test/tsan/pthread_atfork_deadlock.c
+++ b/test/tsan/pthread_atfork_deadlock.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 // Regression test for
 // https://code.google.com/p/thread-sanitizer/issues/detail?id=61
 // When the data race was reported, pthread_atfork() handler used to be
diff --git a/test/tsan/race_on_barrier.c b/test/tsan/race_on_barrier.c
index e93995a..99b18fe 100644
--- a/test/tsan/race_on_barrier.c
+++ b/test/tsan/race_on_barrier.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <stddef.h>
diff --git a/test/tsan/race_on_barrier2.c b/test/tsan/race_on_barrier2.c
index 75f9b4a..98c028e 100644
--- a/test/tsan/race_on_barrier2.c
+++ b/test/tsan/race_on_barrier2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <stddef.h>
diff --git a/test/tsan/race_on_heap.cc b/test/tsan/race_on_heap.cc
index 54c4a9b..a66e0c4 100644
--- a/test/tsan/race_on_heap.cc
+++ b/test/tsan/race_on_heap.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -38,7 +38,7 @@
 // CHECK: WARNING: ThreadSanitizer: data race
 // ...
 // CHECK: Location is heap block of size 99 at [[ADDR]] allocated by thread T1:
-// CHCEKL     #0 malloc
+// CHCEK:     #0 malloc
 // CHECK:     #{{1|2}} alloc
 // CHECK:     #{{2|3}} AllocThread
 // ...
diff --git a/test/tsan/race_on_mutex.c b/test/tsan/race_on_mutex.c
index d3e6210..b4adeeb 100644
--- a/test/tsan/race_on_mutex.c
+++ b/test/tsan/race_on_mutex.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <stddef.h>
diff --git a/test/tsan/race_on_mutex2.c b/test/tsan/race_on_mutex2.c
index c860fc6..1796d0c 100644
--- a/test/tsan/race_on_mutex2.c
+++ b/test/tsan/race_on_mutex2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <stddef.h>
diff --git a/test/tsan/race_on_puts.cc b/test/tsan/race_on_puts.cc
new file mode 100644
index 0000000..1f2b4db
--- /dev/null
+++ b/test/tsan/race_on_puts.cc
@@ -0,0 +1,29 @@
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+#include <pthread.h>
+#include <stdio.h>
+#include <unistd.h>
+
+char s[] = "abracadabra";
+
+void *Thread0(void *p) {
+  puts(s);
+  return 0;
+}
+
+void *Thread1(void *p) {
+  s[3] = 'z';
+  return 0;
+}
+
+int main() {
+  pthread_t th[2];
+  pthread_create(&th[0], 0, Thread0, 0);
+  pthread_create(&th[1], 0, Thread1, 0);
+  pthread_join(th[0], 0);
+  pthread_join(th[1], 0);
+  fprintf(stderr, "DONE");
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: DONE
+
diff --git a/test/tsan/race_on_read.cc b/test/tsan/race_on_read.cc
index 4dc976d..1ec0522 100644
--- a/test/tsan/race_on_read.cc
+++ b/test/tsan/race_on_read.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/race_on_write.cc b/test/tsan/race_on_write.cc
index add22ce..484bbb7 100644
--- a/test/tsan/race_on_write.cc
+++ b/test/tsan/race_on_write.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/race_with_finished_thread.cc b/test/tsan/race_with_finished_thread.cc
index b3f9b70..d287600 100644
--- a/test/tsan/race_with_finished_thread.cc
+++ b/test/tsan/race_with_finished_thread.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stddef.h>
 #include <stdio.h>
diff --git a/test/tsan/signal_errno.cc b/test/tsan/signal_errno.cc
index 326953e..27d4ecd 100644
--- a/test/tsan/signal_errno.cc
+++ b/test/tsan/signal_errno.cc
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/tsan/signal_malloc.cc b/test/tsan/signal_malloc.cc
index f6fc3fa..db5e79b 100644
--- a/test/tsan/signal_malloc.cc
+++ b/test/tsan/signal_malloc.cc
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <stdio.h>
 #include <stdlib.h>
 #include <signal.h>
diff --git a/test/tsan/sigsuspend.cc b/test/tsan/sigsuspend.cc
index dc96066..f614c12 100644
--- a/test/tsan/sigsuspend.cc
+++ b/test/tsan/sigsuspend.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 
 // Always enable asserts.
 #ifdef NDEBUG
diff --git a/test/tsan/simple_race.c b/test/tsan/simple_race.c
index 78f105f..7b60c5e 100644
--- a/test/tsan/simple_race.c
+++ b/test/tsan/simple_race.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -26,3 +26,4 @@
 }
 
 // CHECK: WARNING: ThreadSanitizer: data race
+
diff --git a/test/tsan/simple_race.cc b/test/tsan/simple_race.cc
index 9010ed5..0236b9f 100644
--- a/test/tsan/simple_race.cc
+++ b/test/tsan/simple_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 
diff --git a/test/tsan/simple_stack.c b/test/tsan/simple_stack.c
index f2326eb..899277f 100644
--- a/test/tsan/simple_stack.c
+++ b/test/tsan/simple_stack.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/simple_stack2.cc b/test/tsan/simple_stack2.cc
index 080387a..ba0303c 100644
--- a/test/tsan/simple_stack2.cc
+++ b/test/tsan/simple_stack2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/sleep_sync.cc b/test/tsan/sleep_sync.cc
index 5047fdf..c7614e1 100644
--- a/test/tsan/sleep_sync.cc
+++ b/test/tsan/sleep_sync.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/sleep_sync2.cc b/test/tsan/sleep_sync2.cc
index 7739405..4e61699 100644
--- a/test/tsan/sleep_sync2.cc
+++ b/test/tsan/sleep_sync2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/stack_race.cc b/test/tsan/stack_race.cc
index 90da2f8..2e02f46 100644
--- a/test/tsan/stack_race.cc
+++ b/test/tsan/stack_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stddef.h>
 #include <unistd.h>
diff --git a/test/tsan/stack_race2.cc b/test/tsan/stack_race2.cc
index 83a67f4..818db36 100644
--- a/test/tsan/stack_race2.cc
+++ b/test/tsan/stack_race2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stddef.h>
 #include <unistd.h>
diff --git a/test/tsan/static_init3.cc b/test/tsan/static_init3.cc
index 94934b4..3b9fe62 100644
--- a/test/tsan/static_init3.cc
+++ b/test/tsan/static_init3.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdlib.h>
 #include <stdio.h>
diff --git a/test/tsan/suppress_same_address.cc b/test/tsan/suppress_same_address.cc
index c172140..df19da1 100644
--- a/test/tsan/suppress_same_address.cc
+++ b/test/tsan/suppress_same_address.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/suppress_same_stacks.cc b/test/tsan/suppress_same_stacks.cc
index 23008b4..9305650 100644
--- a/test/tsan/suppress_same_stacks.cc
+++ b/test/tsan/suppress_same_stacks.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 
 volatile int N;  // Prevent loop unrolling.
diff --git a/test/tsan/test_output.sh b/test/tsan/test_output.sh
index 8a15a67..8b286f4 100755
--- a/test/tsan/test_output.sh
+++ b/test/tsan/test_output.sh
@@ -5,7 +5,6 @@
 
 HERE=$(dirname $0)
 TSAN_DIR=$(dirname $0)/../../lib/tsan
-BLACKLIST=$HERE/Helpers/blacklist.txt
 
 # Assume clang and clang++ are in path.
 : ${CC:=clang}
@@ -13,8 +12,8 @@
 : ${FILECHECK:=FileCheck}
 
 # TODO: add testing for all of -O0...-O3
-CFLAGS="-fsanitize=thread -fsanitize-blacklist=$BLACKLIST -fPIE -O1 -g -Wall"
-LDFLAGS="-pie -lpthread -ldl -lrt -lm -Wl,--whole-archive $TSAN_DIR/rtl/libtsan.a -Wl,--no-whole-archive"
+CFLAGS="-fsanitize=thread -fPIE -O1 -g -Wall"
+LDFLAGS="-pie -pthread -ldl -lrt -lm -Wl,--whole-archive $TSAN_DIR/rtl/libtsan.a -Wl,--no-whole-archive"
 
 test_file() {
   SRC=$1
@@ -41,6 +40,10 @@
       echo TEST $c is not supported
       continue
     fi
+    if [[ $c == */*blacklist*.cc ]]; then
+      echo TEST $c is not supported
+      continue
+    fi
     if [ "`grep "TSAN_OPTIONS" $c`" ]; then
       echo SKIPPING $c -- requires TSAN_OPTIONS
       continue
diff --git a/test/tsan/thread_end_with_ignore.cc b/test/tsan/thread_end_with_ignore.cc
index 438a738..79bb08d 100644
--- a/test/tsan/thread_end_with_ignore.cc
+++ b/test/tsan/thread_end_with_ignore.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 
diff --git a/test/tsan/thread_end_with_ignore2.cc b/test/tsan/thread_end_with_ignore2.cc
index ca9b5ca..9387ea4 100644
--- a/test/tsan/thread_end_with_ignore2.cc
+++ b/test/tsan/thread_end_with_ignore2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 extern "C" void AnnotateIgnoreWritesBegin(const char *f, int l);
 
 int main() {
diff --git a/test/tsan/thread_end_with_ignore3.cc b/test/tsan/thread_end_with_ignore3.cc
index f066f9e..55688b2 100644
--- a/test/tsan/thread_end_with_ignore3.cc
+++ b/test/tsan/thread_end_with_ignore3.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 extern "C" void AnnotateIgnoreReadsBegin(const char *f, int l);
 extern "C" void AnnotateIgnoreReadsEnd(const char *f, int l);
 
diff --git a/test/tsan/thread_leak3.c b/test/tsan/thread_leak3.c
index efc4dd0..f4db484 100644
--- a/test/tsan/thread_leak3.c
+++ b/test/tsan/thread_leak3.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/thread_leak5.c b/test/tsan/thread_leak5.c
index 130858c..ca244a9 100644
--- a/test/tsan/thread_leak5.c
+++ b/test/tsan/thread_leak5.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/thread_name.cc b/test/tsan/thread_name.cc
index af29a2e..05b0a35 100644
--- a/test/tsan/thread_name.cc
+++ b/test/tsan/thread_name.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/thread_name2.cc b/test/tsan/thread_name2.cc
index 0530c98..b9a5746 100644
--- a/test/tsan/thread_name2.cc
+++ b/test/tsan/thread_name2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <unistd.h>
diff --git a/test/tsan/tiny_race.c b/test/tsan/tiny_race.c
index 9d84224..c10eab1 100644
--- a/test/tsan/tiny_race.c
+++ b/test/tsan/tiny_race.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/tsan/tls_race.cc b/test/tsan/tls_race.cc
index dbfc553..1858934 100644
--- a/test/tsan/tls_race.cc
+++ b/test/tsan/tls_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stddef.h>
 #include <unistd.h>
diff --git a/test/tsan/tls_race2.cc b/test/tsan/tls_race2.cc
index ed9f4d0..0ca629a 100644
--- a/test/tsan/tls_race2.cc
+++ b/test/tsan/tls_race2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stddef.h>
 #include <unistd.h>
diff --git a/test/tsan/unaligned_race.cc b/test/tsan/unaligned_race.cc
index a2b2675..6e9b5a3 100644
--- a/test/tsan/unaligned_race.cc
+++ b/test/tsan/unaligned_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/test/tsan/vptr_harmful_race.cc b/test/tsan/vptr_harmful_race.cc
index c7d4b74..68e12e8 100644
--- a/test/tsan/vptr_harmful_race.cc
+++ b/test/tsan/vptr_harmful_race.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <semaphore.h>
 #include <stdio.h>
diff --git a/test/tsan/vptr_harmful_race2.cc b/test/tsan/vptr_harmful_race2.cc
index b018d5c..aa53bbb 100644
--- a/test/tsan/vptr_harmful_race2.cc
+++ b/test/tsan/vptr_harmful_race2.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <semaphore.h>
 #include <stdio.h>
diff --git a/test/tsan/vptr_harmful_race3.cc b/test/tsan/vptr_harmful_race3.cc
index 01c94e2..ac6ea94 100644
--- a/test/tsan/vptr_harmful_race3.cc
+++ b/test/tsan/vptr_harmful_race3.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <semaphore.h>
 #include <stdio.h>
diff --git a/test/tsan/write_in_reader_lock.cc b/test/tsan/write_in_reader_lock.cc
index 77373be..5588213 100644
--- a/test/tsan/write_in_reader_lock.cc
+++ b/test/tsan/write_in_reader_lock.cc
@@ -1,4 +1,4 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 #include <pthread.h>
 #include <unistd.h>
 
diff --git a/test/ubsan/CMakeLists.txt b/test/ubsan/CMakeLists.txt
index ded124d..4c6b0bc 100644
--- a/test/ubsan/CMakeLists.txt
+++ b/test/ubsan/CMakeLists.txt
@@ -4,11 +4,15 @@
 configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
   ${CMAKE_CURRENT_BINARY_DIR}/UbsanConfig/lit.site.cfg)
+set(UBSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/UbsanConfig)
 
-set(UBSAN_LIT_TEST_MODE "AddressSanitizer")
-configure_lit_site_cfg(
-  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-  ${CMAKE_CURRENT_BINARY_DIR}/AsanConfig/lit.site.cfg)
+if(ASAN_SUPPORTED_ARCH)
+  set(UBSAN_LIT_TEST_MODE "AddressSanitizer")
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/AsanConfig/lit.site.cfg)
+  list(APPEND UBSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/AsanConfig)
+endif()
 
 set(UBSAN_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS})
 if(NOT COMPILER_RT_STANDALONE_BUILD)
@@ -16,7 +20,6 @@
 endif()
 
 add_lit_testsuite(check-ubsan "Running UndefinedBehaviorSanitizer tests"
-  ${CMAKE_CURRENT_BINARY_DIR}/UbsanConfig
-  ${CMAKE_CURRENT_BINARY_DIR}/AsanConfig
+  ${UBSAN_TESTSUITES}
   DEPENDS ${UBSAN_TEST_DEPS})
 set_target_properties(check-ubsan PROPERTIES FOLDER "UBSan unittests")