am 09049311: am cfff36df: am a60ff6c5: Merge "libc: Define new symbol visibility macros"

* commit '09049311a229c427f73e3e0ac873bf344b45aaf2':
  libc: Define new symbol visibility macros
diff --git a/libc/Android.mk b/libc/Android.mk
index 9881d59..1ce2feb 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -346,13 +346,15 @@
 	arch-arm/bionic/__get_sp.S \
 	arch-arm/bionic/_exit_with_stack_teardown.S \
 	arch-arm/bionic/_setjmp.S \
-	arch-arm/bionic/atomics_arm.S \
+	arch-arm/bionic/atomics_arm.c \
 	arch-arm/bionic/clone.S \
 	arch-arm/bionic/eabi.c \
 	arch-arm/bionic/ffs.S \
+	arch-arm/bionic/futex_arm.S \
 	arch-arm/bionic/kill.S \
 	arch-arm/bionic/libgcc_compat.c \
 	arch-arm/bionic/tkill.S \
+	arch-arm/bionic/tgkill.S \
 	arch-arm/bionic/memcmp.S \
 	arch-arm/bionic/memcmp16.S \
 	arch-arm/bionic/memcpy.S \
@@ -394,9 +396,9 @@
 	arch-x86/bionic/__get_sp.S \
 	arch-x86/bionic/__get_tls.c \
 	arch-x86/bionic/__set_tls.c \
-	arch-x86/bionic/atomics_x86.S \
 	arch-x86/bionic/clone.S \
 	arch-x86/bionic/_exit_with_stack_teardown.S \
+	arch-x86/bionic/futex_x86.S \
 	arch-x86/bionic/setjmp.S \
 	arch-x86/bionic/_setjmp.S \
 	arch-x86/bionic/sigsetjmp.S \
@@ -474,7 +476,12 @@
 else # !arm
   ifeq ($(TARGET_ARCH),x86)
     libc_crt_target_cflags :=
-    # TARGET_GLOBAL_CFLAGS from build/core/combo/TARGET_linux-x86.mk sets all required flags.
+    ifeq ($(ARCH_X86_HAVE_SSE2),true)
+        libc_crt_target_cflags += -DUSE_SSE2=1
+    endif
+    ifeq ($(ARCH_X86_HAVE_SSSE3),true)
+        libc_crt_target_cflags += -DUSE_SSSE3=1
+    endif
   endif # x86
 endif # !arm
 
@@ -640,7 +647,13 @@
 # ========================================================
 include $(CLEAR_VARS)
 
-LOCAL_CFLAGS := $(libc_common_cflags)
+# pthread deadlock prediction:
+# set -DPTHREAD_DEBUG -DPTHREAD_DEBUG_ENABLED=1 to enable support for
+# pthread deadlock prediction.
+# Since this code is experimental it is disabled by default.
+# see libc/bionic/pthread_debug.c for details
+
+LOCAL_CFLAGS := $(libc_common_cflags) -DPTHREAD_DEBUG -DPTHREAD_DEBUG_ENABLED=0
 LOCAL_C_INCLUDES := $(libc_common_c_includes)
 
 LOCAL_SRC_FILES := \
@@ -648,6 +661,7 @@
 	$(libc_static_common_src_files) \
 	bionic/dlmalloc.c \
 	bionic/malloc_debug_common.c \
+	bionic/pthread_debug.c \
 	bionic/libc_init_dynamic.c
 
 LOCAL_MODULE:= libc
@@ -718,7 +732,7 @@
 
 LOCAL_MODULE:= libc_malloc_debug_qemu
 
-LOCAL_SHARED_LIBRARIES := libc
+LOCAL_SHARED_LIBRARIES := libc libdl
 LOCAL_WHOLE_STATIC_LIBRARIES := libc_common
 LOCAL_SYSTEM_SHARED_LIBRARIES :=
 
diff --git a/libc/SYSCALLS.TXT b/libc/SYSCALLS.TXT
index 3190b12..b9bd9e7 100644
--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -64,6 +64,7 @@
 # see comments in arch-arm/bionic/kill.S to understand why we don't generate an ARM stub for kill/tkill
 int     kill(pid_t, int)           -1,37
 int     tkill(pid_t tid, int sig)  -1,238
+int     tgkill(pid_t tgid, pid_t tid, int sig)  -1,270
 int     __ptrace:ptrace(int request, int pid, void* addr, void* data)  26
 int     __set_thread_area:set_thread_area(void*  user_desc)  -1,243
 int     __getpriority:getpriority(int, int)  96
diff --git a/libc/arch-arm/bionic/atomics_arm.S b/libc/arch-arm/bionic/atomics_arm.S
deleted file mode 100644
index 4d9cbcf..0000000
--- a/libc/arch-arm/bionic/atomics_arm.S
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#include <sys/linux-syscalls.h>
-#include <machine/asm.h>
-#include <machine/cpu-features.h>
-
-#define FUTEX_WAIT 0
-#define FUTEX_WAKE 1
-
-#if defined(__ARM_HAVE_LDREX_STREX)
-/*
- * ===========================================================================
- *      ARMv6+ implementation
- * ===========================================================================
- */
-
-/* r0(addr) -> r0(old) */
-ENTRY(__atomic_dec)
-    mov     r1, r0                      @ copy addr so we don't clobber it
-1:  ldrex   r0, [r1]                    @ load current value into r0
-    sub     r2, r0, #1                  @ generate new value into r2
-    strex   r3, r2, [r1]                @ try to store new value; result in r3
-    cmp     r3, #0                      @ success?
-    bxeq    lr                          @ yes, return
-    b       1b                          @ no, retry
-END(__atomic_dec)
-
-/* r0(addr) -> r0(old) */
-ENTRY(__atomic_inc)
-    mov     r1, r0
-1:  ldrex   r0, [r1]
-    add     r2, r0, #1
-    strex   r3, r2, [r1]
-    cmp     r3, #0
-    bxeq    lr
-    b       1b
-END(__atomic_inc)
-
-/* r0(old) r1(new) r2(addr) -> r0(zero_if_succeeded) */
-ENTRY(__atomic_cmpxchg)
-1:  mov     ip, #2                      @ ip=2 means "new != old"
-    ldrex   r3, [r2]                    @ load current value into r3
-    teq     r0, r3                      @ new == old?
-    strexeq ip, r1, [r2]                @ yes, try store, set ip to 0 or 1
-    teq     ip, #1                      @ strex failure?
-    beq     1b                          @ yes, retry
-    mov     r0, ip                      @ return 0 on success, 2 on failure
-    bx      lr
-END(__atomic_cmpxchg)
-
-/* r0(new) r1(addr) -> r0(old) */
-ENTRY(__atomic_swap)
-1:  ldrex   r2, [r1]
-    strex   r3, r0, [r1]
-    teq     r3, #0
-    bne     1b
-    mov     r0, r2
-    bx      lr
-END(__atomic_swap)
-
-#else /*not defined __ARM_HAVE_LDREX_STREX*/
-/*
- * ===========================================================================
- *      Pre-ARMv6 implementation
- * ===========================================================================
- */
-
-    /* int __kernel_cmpxchg(int oldval, int newval, int* ptr) */
-    .equ    kernel_cmpxchg, 0xFFFF0FC0
-    .equ    kernel_atomic_base, 0xFFFF0FFF
-
-/* r0(addr) -> r0(old) */
-ENTRY(__atomic_dec)
-    .save {r4, lr}
-    stmdb   sp!, {r4, lr}
-    mov     r2, r0
-1: @ atomic_dec
-    ldr     r0, [r2]
-    mov     r3, #kernel_atomic_base
-    add     lr, pc, #4
-    sub     r1, r0, #1
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)
-    bcc     1b
-    add     r0, r1, #1
-    ldmia   sp!, {r4, lr}
-    bx      lr
-END(__atomic_dec)
-
-/* r0(addr) -> r0(old) */
-ENTRY(__atomic_inc)
-    .save {r4, lr}
-    stmdb   sp!, {r4, lr}
-    mov     r2, r0
-1: @ atomic_inc
-    ldr     r0, [r2]
-    mov     r3, #kernel_atomic_base
-    add     lr, pc, #4
-    add     r1, r0, #1
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)
-    bcc     1b
-    sub     r0, r1, #1
-    ldmia   sp!, {r4, lr}
-    bx      lr
-END(__atomic_inc)
-
-/* r0(old) r1(new) r2(addr) -> r0(zero_if_succeeded) */
-ENTRY(__atomic_cmpxchg)
-    .save {r4, lr}
-    stmdb   sp!, {r4, lr}
-    mov     r4, r0          /* r4 = save oldvalue */
-1: @ atomic_cmpxchg
-    mov     r3, #kernel_atomic_base
-    add     lr, pc, #4
-    mov     r0, r4          /* r0 = oldvalue */
-    add     pc, r3, #(kernel_cmpxchg - kernel_atomic_base)
-    bcs     2f              /* swap was made. we're good, return. */
-    ldr     r3, [r2]        /* swap not made, see if it's because *ptr!=oldvalue */
-    cmp     r3, r4
-    beq     1b
-2: @ atomic_cmpxchg
-    ldmia   sp!, {r4, lr}
-    bx      lr
-END(__atomic_cmpxchg)
-
-/* r0(new) r1(addr) -> r0(old) */
-ENTRY(__atomic_swap)
-    swp     r0, r0, [r1]
-    bx      lr
-END(__atomic_swap)
-
-#endif /*not defined __ARM_HAVE_LDREX_STREX*/
-
-
-/* __futex_wait(*ftx, val, *timespec) */
-/* __futex_wake(*ftx, counter) */
-/* __futex_syscall3(*ftx, op, val) */
-/* __futex_syscall4(*ftx, op, val, *timespec) */
-
-.global __futex_wait
-.type __futex_wait, %function
-
-.global __futex_wake
-.type __futex_wake, %function
-
-.global __futex_syscall3
-.type __futex_syscall3, %function
-
-.global __futex_syscall4
-.type __futex_syscall4, %function
-
-#if __ARM_EABI__
-
-ENTRY(__futex_syscall3)
-    stmdb   sp!, {r4, r7}
-    .save   {r4, r7}
-    ldr     r7, =__NR_futex
-    swi     #0
-    ldmia   sp!, {r4, r7}
-    bx      lr
-END(__futex_syscall3)
-
-ENTRY(__futex_wait)
-    stmdb   sp!, {r4, r7}
-    .save   {r4, r7}
-    mov     r3, r2
-    mov     r2, r1
-    mov     r1, #FUTEX_WAIT
-    ldr     r7, =__NR_futex
-    swi     #0
-    ldmia   sp!, {r4, r7}
-    bx      lr
-END(__futex_wait)
-
-ENTRY(__futex_wake)
-    .save   {r4, r7}
-    stmdb   sp!, {r4, r7}
-    mov     r2, r1
-    mov     r1, #FUTEX_WAKE
-    ldr     r7, =__NR_futex
-    swi     #0
-    ldmia   sp!, {r4, r7}
-    bx      lr
-END(__futex_wake)
-
-#else
-
-ENTRY(__futex_syscall3)
-    swi     #__NR_futex
-    bx      lr
-END(__futex_syscall3)
-
-ENTRY(__futex_wait)
-    mov     r3, r2
-    mov     r2, r1
-    mov     r1, #FUTEX_WAIT
-    swi     #__NR_futex
-    bx      lr
-END(__futex_wait)
-
-ENTRY(__futex_wake)
-    mov     r2, r1
-    mov     r1, #FUTEX_WAKE
-    swi     #__NR_futex
-    bx      lr
-END(__futex_wake)
-
-#endif
-
-ENTRY(__futex_syscall4)
-    b __futex_syscall3
-END(__futex_syscall4)
diff --git a/libc/arch-arm/bionic/atomics_arm.c b/libc/arch-arm/bionic/atomics_arm.c
new file mode 100644
index 0000000..d69eaff
--- /dev/null
+++ b/libc/arch-arm/bionic/atomics_arm.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/* The purpose of this file is to export a small set of atomic-related
+ * functions from the C library, to ensure binary ABI compatibility for
+ * the NDK.
+ *
+ * These functions were initially exposed by the NDK through <sys/atomics.h>,
+ * which was unfortunate because their implementation didn't provide any
+ * memory barriers at all.
+ *
+ * This wasn't a problem for the platform code that used them, because it
+ * used explicit barrier instructions around them. On the other hand, it means
+ * that any NDK-generated machine code that linked against them would not
+ * perform correctly when running on multi-core devices.
+ *
+ * To fix this, the platform code was first modified to not use any of these
+ * functions (everything is now inlined through assembly statements, see
+ * libc/private/bionic_arm_inline.h and the headers it includes.
+ *
+ * The functions here are thus only for the benefit of NDK applications,
+ * and now includes full memory barriers to prevent any random memory ordering
+ * issue from cropping.
+ *
+ * Note that we also provide an updated <sys/atomics.h> header that defines
+ * always_inlined versions of the functions that use the GCC builtin
+ * intrinsics to perform the same thing.
+ *
+ * NOTE: There is no need for a similar file for non-ARM platforms.
+ */
+
+/* DO NOT INCLUDE <sys/atomics.h> HERE ! */
+
+int
+__atomic_cmpxchg(int old, int _new, volatile int *ptr)
+{
+    /* We must return 0 on success */
+    return __sync_val_compare_and_swap(ptr, old, _new) != old;
+}
+
+int
+__atomic_swap(int _new, volatile int *ptr)
+{
+    int prev;
+    do {
+        prev = *ptr;
+    } while (__sync_val_compare_and_swap(ptr, prev, _new) != prev);
+    return prev;
+}
+
+int
+__atomic_dec(volatile int *ptr)
+{
+  return __sync_fetch_and_sub (ptr, 1);
+}
+
+int
+__atomic_inc(volatile int *ptr)
+{
+  return __sync_fetch_and_add (ptr, 1);
+}
diff --git a/libc/arch-arm/bionic/crtbegin_dynamic.S b/libc/arch-arm/bionic/crtbegin_dynamic.S
index 0999084..6ca0845 100644
--- a/libc/arch-arm/bionic/crtbegin_dynamic.S
+++ b/libc/arch-arm/bionic/crtbegin_dynamic.S
@@ -43,21 +43,17 @@
 #    - address of an "onexit" function, not used on any
 #      platform supported by Bionic
 #
-#    - address of the "main" function of the program. We
-#      can't hard-code it in the adr pseudo instruction
-#      so we use a tiny trampoline that will get relocated
-#      by the dynamic linker before this code runs
+#    - address of the "main" function of the program.
 #
 #    - address of the constructor list
 #
 _start:	
 	mov	r0, sp
 	mov	r1, #0
-	adr r2, 0f
-	adr r3, 1f
-	b	__libc_init
-
-0:  b   main
+	ldr	r2, =main
+	adr	r3, 1f
+	ldr	r4, =__libc_init
+	bx	r4
 
 1:  .long   __PREINIT_ARRAY__
     .long   __INIT_ARRAY__
diff --git a/libc/arch-arm/bionic/crtbegin_static.S b/libc/arch-arm/bionic/crtbegin_static.S
index 13b05b2..e62ac1d 100644
--- a/libc/arch-arm/bionic/crtbegin_static.S
+++ b/libc/arch-arm/bionic/crtbegin_static.S
@@ -43,21 +43,17 @@
 #    - address of an "onexit" function, not used on any
 #      platform supported by Bionic
 #
-#    - address of the "main" function of the program. We
-#      can't hard-code it in the adr pseudo instruction
-#      so we use a tiny trampoline that will get relocated
-#      by the dynamic linker before this code runs
+#    - address of the "main" function of the program.
 #
 #    - address of the constructor list
 #
 _start:	
 	mov	r0, sp
 	mov	r1, #0
-	adr r2, 0f
-	adr r3, 1f
-	b	__libc_init
-
-0:  b   main
+	ldr	r2, =main
+	adr	r3, 1f
+	ldr	r4, =__libc_init
+	bx	r4
 
 1:  .long   __PREINIT_ARRAY__
     .long   __INIT_ARRAY__
diff --git a/libc/arch-arm/bionic/futex_arm.S b/libc/arch-arm/bionic/futex_arm.S
new file mode 100644
index 0000000..7041663
--- /dev/null
+++ b/libc/arch-arm/bionic/futex_arm.S
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include <sys/linux-syscalls.h>
+#include <machine/asm.h>
+#include <machine/cpu-features.h>
+
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+
+/* __futex_wait(*ftx, val, *timespec) */
+/* __futex_wake(*ftx, counter) */
+/* __futex_syscall3(*ftx, op, val) */
+/* __futex_syscall4(*ftx, op, val, *timespec) */
+
+.global __futex_wait
+.type __futex_wait, %function
+
+.global __futex_wake
+.type __futex_wake, %function
+
+.global __futex_syscall3
+.type __futex_syscall3, %function
+
+.global __futex_syscall4
+.type __futex_syscall4, %function
+
+#if __ARM_EABI__
+
+ENTRY(__futex_syscall3)
+    stmdb   sp!, {r4, r7}
+    .save   {r4, r7}
+    ldr     r7, =__NR_futex
+    swi     #0
+    ldmia   sp!, {r4, r7}
+    bx      lr
+END(__futex_syscall3)
+
+ENTRY(__futex_wait)
+    stmdb   sp!, {r4, r7}
+    .save   {r4, r7}
+    mov     r3, r2
+    mov     r2, r1
+    mov     r1, #FUTEX_WAIT
+    ldr     r7, =__NR_futex
+    swi     #0
+    ldmia   sp!, {r4, r7}
+    bx      lr
+END(__futex_wait)
+
+ENTRY(__futex_wake)
+    .save   {r4, r7}
+    stmdb   sp!, {r4, r7}
+    mov     r2, r1
+    mov     r1, #FUTEX_WAKE
+    ldr     r7, =__NR_futex
+    swi     #0
+    ldmia   sp!, {r4, r7}
+    bx      lr
+END(__futex_wake)
+
+#else
+
+ENTRY(__futex_syscall3)
+    swi     #__NR_futex
+    bx      lr
+END(__futex_syscall3)
+
+ENTRY(__futex_wait)
+    mov     r3, r2
+    mov     r2, r1
+    mov     r1, #FUTEX_WAIT
+    swi     #__NR_futex
+    bx      lr
+END(__futex_wait)
+
+ENTRY(__futex_wake)
+    mov     r2, r1
+    mov     r1, #FUTEX_WAKE
+    swi     #__NR_futex
+    bx      lr
+END(__futex_wake)
+
+#endif
+
+ENTRY(__futex_syscall4)
+    b __futex_syscall3
+END(__futex_syscall4)
diff --git a/libc/arch-arm/bionic/tgkill.S b/libc/arch-arm/bionic/tgkill.S
new file mode 100644
index 0000000..da5c0af
--- /dev/null
+++ b/libc/arch-arm/bionic/tgkill.S
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/* unlike our auto-generated syscall stubs, this code saves lr
+   on the stack, as well as a few other registers. this makes
+   our stack unwinder happy, when we generate debug stack
+   traces after the C library or other parts of the system
+   abort due to a fatal runtime error (e.g. detection
+   of a corrupted malloc heap).
+*/
+
+#include <sys/linux-syscalls.h>
+#include <machine/asm.h>
+
+#ifndef __NR_tgkill
+#define __NR_tgkill  268
+#endif
+
+ENTRY(tgkill)
+    stmfd   sp!, {r4-r7, ip, lr}
+    ldr     r7, =__NR_tgkill
+    swi     #0
+    ldmfd   sp!, {r4-r7, ip, lr}
+    movs    r0, r0
+    bxpl    lr
+    b       __set_syscall_errno
+END(tgkill)
diff --git a/libc/arch-sh/syscalls/tgkill.S b/libc/arch-sh/syscalls/tgkill.S
new file mode 100644
index 0000000..222f836
--- /dev/null
+++ b/libc/arch-sh/syscalls/tgkill.S
@@ -0,0 +1,32 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+
+    .text
+    .type tgkill, @function
+    .globl tgkill
+    .align 4
+
+tgkill:
+
+    /* invoke trap */
+    mov.l   0f, r3  /* trap num */
+    trapa   #(3 + 0x10)
+
+    /* check return value */
+    cmp/pz  r0
+    bt      __NR_tgkill_end
+
+    /* keep error number */
+    sts.l   pr, @-r15
+    mov.l   1f, r1
+    jsr     @r1
+    mov     r0, r4
+    lds.l   @r15+, pr
+
+__NR_tgkill_end:
+    rts
+    nop
+
+    .align  2
+0:  .long   __NR_tgkill
+1:  .long   __set_syscall_errno
diff --git a/libc/arch-x86/bionic/atomics_x86.S b/libc/arch-x86/bionic/futex_x86.S
similarity index 100%
rename from libc/arch-x86/bionic/atomics_x86.S
rename to libc/arch-x86/bionic/futex_x86.S
diff --git a/libc/arch-x86/string/sse2-memset5-atom.S b/libc/arch-x86/string/sse2-memset5-atom.S
index 4b7f71b..557c019 100644
--- a/libc/arch-x86/string/sse2-memset5-atom.S
+++ b/libc/arch-x86/string/sse2-memset5-atom.S
@@ -93,7 +93,7 @@
 # define SETRTNVAL	movl DEST(%esp), %eax
 #endif
 
-#ifdef SHARED
+#if (defined SHARED || defined __PIC__)
 # define ENTRANCE	PUSH (%ebx);
 # define RETURN_END	POP (%ebx); ret
 # define RETURN		RETURN_END; CFI_PUSH (%ebx)
@@ -313,7 +313,7 @@
 	PUSH (%ebx)
 	mov	$SHARED_CACHE_SIZE, %ebx
 #else
-# ifdef SHARED
+# if (defined SHARED || defined __PIC__)
 	call	__i686.get_pc_thunk.bx
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
 	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
@@ -331,7 +331,7 @@
 # define RESTORE_EBX_STATE CFI_PUSH (%ebx)
 	cmp	$DATA_CACHE_SIZE, %ecx
 #else
-# ifdef SHARED
+# if (defined SHARED || defined __PIC__)
 #  define RESTORE_EBX_STATE
 	call	__i686.get_pc_thunk.bx
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
@@ -436,7 +436,7 @@
 	jae	L(128bytesormore_nt)
 	sfence
 L(shared_cache_loop_end):
-#if defined DATA_CACHE_SIZE || !defined SHARED
+#if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
 	POP (%ebx)
 #endif
 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
diff --git a/libc/arch-x86/string/ssse3-memcpy5.S b/libc/arch-x86/string/ssse3-memcpy5.S
index b4773df..1bf6d22 100644
--- a/libc/arch-x86/string/ssse3-memcpy5.S
+++ b/libc/arch-x86/string/ssse3-memcpy5.S
@@ -104,7 +104,7 @@
 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
 #define POP(REG)	popl REG; CFI_POP (REG)
 
-#ifdef SHARED
+#if (defined SHARED || defined __PIC__)
 # define PARMS		8		/* Preserve EBX.  */
 # define ENTRANCE	PUSH (%ebx);
 # define RETURN_END	POP (%ebx); ret
@@ -216,7 +216,7 @@
 #ifdef SHARED_CACHE_SIZE_HALF
 	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
 #else
-# ifdef SHARED
+# if (defined SHARED || defined __PIC__)
 	call	__i686.get_pc_thunk.bx
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
 	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
@@ -287,7 +287,7 @@
 #ifdef DATA_CACHE_SIZE_HALF
 	cmp	$DATA_CACHE_SIZE_HALF, %ecx
 #else
-# ifdef SHARED
+# if (defined SHARED || defined __PIC__)
 	call	__i686.get_pc_thunk.bx
 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
 	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
diff --git a/libc/arch-x86/syscalls.mk b/libc/arch-x86/syscalls.mk
index 7d3d937..d2ceb42 100644
--- a/libc/arch-x86/syscalls.mk
+++ b/libc/arch-x86/syscalls.mk
@@ -27,6 +27,7 @@
 syscall_src += arch-x86/syscalls/__brk.S
 syscall_src += arch-x86/syscalls/kill.S
 syscall_src += arch-x86/syscalls/tkill.S
+syscall_src += arch-x86/syscalls/tgkill.S
 syscall_src += arch-x86/syscalls/__ptrace.S
 syscall_src += arch-x86/syscalls/__set_thread_area.S
 syscall_src += arch-x86/syscalls/__getpriority.S
diff --git a/libc/arch-x86/syscalls/tgkill.S b/libc/arch-x86/syscalls/tgkill.S
new file mode 100644
index 0000000..99af740
--- /dev/null
+++ b/libc/arch-x86/syscalls/tgkill.S
@@ -0,0 +1,29 @@
+/* autogenerated by gensyscalls.py */
+#include <sys/linux-syscalls.h>
+
+    .text
+    .type tgkill, @function
+    .globl tgkill
+    .align 4
+
+tgkill:
+    pushl   %ebx
+    pushl   %ecx
+    pushl   %edx
+    mov     16(%esp), %ebx
+    mov     20(%esp), %ecx
+    mov     24(%esp), %edx
+    movl    $__NR_tgkill, %eax
+    int     $0x80
+    cmpl    $-129, %eax
+    jb      1f
+    negl    %eax
+    pushl   %eax
+    call    __set_errno
+    addl    $4, %esp
+    orl     $-1, %eax
+1:
+    popl    %edx
+    popl    %ecx
+    popl    %ebx
+    ret
diff --git a/libc/bionic/atomics_x86.c b/libc/bionic/atomics_x86.c
deleted file mode 100644
index fd60f4f..0000000
--- a/libc/bionic/atomics_x86.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-#include <sys/atomics.h>
-
-#define FUTEX_SYSCALL 240
-#define FUTEX_WAIT 0
-#define FUTEX_WAKE 1
-
-int __futex_wait(volatile void *ftx, int val)
-{
-    int ret;
-    asm volatile (
-        "int $0x80;"
-        : "=a" (ret)
-        : "0" (FUTEX_SYSCALL),
-          "b" (ftx),
-          "c" (FUTEX_WAIT),
-          "d" (val),
-          "S" (0)
-    );
-    return ret;
-}
-
-int __futex_wake(volatile void *ftx, int count)
-{
-    int ret;
-    asm volatile (
-        "int $0x80;"
-        : "=a" (ret)
-        : "0" (FUTEX_SYSCALL),
-          "b" (ftx),
-          "c" (FUTEX_WAKE),
-          "d" (count)
-    );
-    return ret;
-}
-
-int __atomic_cmpxchg(int old, int new, volatile int* addr) {
-    int xchg;
-    asm volatile (
-        "lock;"
-        "cmpxchg %%ecx, (%%edx);"
-        "setne %%al;"
-        : "=a" (xchg)
-        : "a" (old),
-          "c" (new),
-          "d" (addr)
-    );
-    return xchg;
-}
-
-int __atomic_swap(int new, volatile int* addr) {
-    int old;
-    asm volatile (
-        "lock;"
-        "xchg %%ecx, (%%edx);"
-        : "=c" (old)
-        : "c" (new),
-          "d" (addr)
-    );
-    return old;
-}
-
-int __atomic_dec(volatile int* addr) {
-    int old;
-    do {
-        old = *addr;
-    } while (atomic_cmpxchg(old, old-1, addr));
-    return old;
-}
diff --git a/libc/bionic/dlmalloc.c b/libc/bionic/dlmalloc.c
index 8c75e9c..496cd1c 100644
--- a/libc/bionic/dlmalloc.c
+++ b/libc/bionic/dlmalloc.c
@@ -774,6 +774,22 @@
 void* dlmemalign(size_t, size_t);
 
 /*
+  int posix_memalign(void **memptr, size_t alignment, size_t size);
+  Places a pointer to a newly allocated chunk of size bytes, aligned
+  in accord with the alignment argument, in *memptr.
+
+  The return value is 0 on success, and ENOMEM on failure.
+
+  The alignment argument should be a power of two. If the argument is
+  not a power of two, the nearest greater power is used.
+  8-byte alignment is guaranteed by normal malloc calls, so don't
+  bother calling memalign with an argument of 8 or less.
+
+  Overreliance on posix_memalign is a sure way to fragment space.
+*/
+int posix_memalign(void **memptr, size_t alignment, size_t size);
+
+/*
   valloc(size_t n);
   Equivalent to memalign(pagesize, n), where pagesize is the page
   size of the system. If the pagesize is unknown, 4096 is used.
@@ -4507,6 +4523,18 @@
   return internal_memalign(gm, alignment, bytes);
 }
 
+int posix_memalign(void **memptr, size_t alignment, size_t size) {
+  int ret = 0;
+
+  *memptr = dlmemalign(alignment, size);
+
+  if (*memptr == 0) {
+    ret = ENOMEM;
+  }
+
+  return ret;
+}
+
 void** dlindependent_calloc(size_t n_elements, size_t elem_size,
                                  void* chunks[]) {
   size_t sz = elem_size; /* serves as 1-element array */
diff --git a/libc/bionic/libc_init_dynamic.c b/libc/bionic/libc_init_dynamic.c
index 4bb2a81..1c8480c 100644
--- a/libc/bionic/libc_init_dynamic.c
+++ b/libc/bionic/libc_init_dynamic.c
@@ -76,6 +76,12 @@
 
     __libc_init_common(elfdata);
 
+    /* Setup pthread routines accordingly to the environment.
+     * Requires system properties
+     */
+    extern void pthread_debug_init(void);
+    pthread_debug_init();
+
     /* Setup malloc routines accordingly to the environment.
      * Requires system properties
      */
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index 3435d21..d9ca432 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -51,6 +51,9 @@
 #include <stdio.h>
 #include <bionic_pthread.h>
 
+extern void pthread_debug_mutex_lock_check(pthread_mutex_t *mutex);
+extern void pthread_debug_mutex_unlock_check(pthread_mutex_t *mutex);
+
 extern int  __pthread_clone(int (*fn)(void*), void *child_stack, int flags, void *arg);
 extern void _exit_with_stack_teardown(void * stackBase, int stackSize, int retCode);
 extern void _exit_thread(int  retCode);
@@ -81,9 +84,6 @@
 #define PTHREAD_ATTR_FLAG_USER_STACK    0x00000002
 
 #define DEFAULT_STACKSIZE (1024 * 1024)
-#define STACKBASE 0x10000000
-
-static uint8_t * gStackBase = (uint8_t *)STACKBASE;
 
 static pthread_mutex_t mmap_lock = PTHREAD_MUTEX_INITIALIZER;
 
@@ -252,7 +252,7 @@
 
     pthread_mutex_lock(&mmap_lock);
 
-    stack = mmap((void *)gStackBase, size,
+    stack = mmap(NULL, size,
                  PROT_READ | PROT_WRITE,
                  MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE,
                  -1, 0);
@@ -697,7 +697,7 @@
             goto Exit;
         }
     }
-    while ( __atomic_cmpxchg( flags, flags | PTHREAD_ATTR_FLAG_DETACHED,
+    while ( __bionic_cmpxchg( flags, flags | PTHREAD_ATTR_FLAG_DETACHED,
                               (volatile int*)&thread->attr.flags ) != 0 );
 Exit:
     pthread_mutex_unlock(&gThreadListLock);
@@ -746,12 +746,6 @@
 }
 
 
-// mutex lock states
-//
-// 0: unlocked
-// 1: locked, no waiters
-// 2: locked, maybe waiters
-
 /* a mutex is implemented as a 32-bit integer holding the following fields
  *
  * bits:     name     description
@@ -762,18 +756,146 @@
  * 1-0       state    lock state (0, 1 or 2)
  */
 
+/* Convenience macro, creates a mask of 'bits' bits that starts from
+ * the 'shift'-th least significant bit in a 32-bit word.
+ *
+ * Examples: FIELD_MASK(0,4)  -> 0xf
+ *           FIELD_MASK(16,9) -> 0x1ff0000
+ */
+#define  FIELD_MASK(shift,bits)           (((1 << (bits))-1) << (shift))
 
-#define  MUTEX_OWNER(m)  (((m)->value >> 16) & 0xffff)
-#define  MUTEX_COUNTER(m) (((m)->value >> 2) & 0xfff)
+/* This one is used to create a bit pattern from a given field value */
+#define  FIELD_TO_BITS(val,shift,bits)    (((val) & ((1 << (bits))-1)) << (shift))
 
-#define  MUTEX_TYPE_MASK       0xc000
-#define  MUTEX_TYPE_NORMAL     0x0000
-#define  MUTEX_TYPE_RECURSIVE  0x4000
-#define  MUTEX_TYPE_ERRORCHECK 0x8000
+/* And this one does the opposite, i.e. extract a field's value from a bit pattern */
+#define  FIELD_FROM_BITS(val,shift,bits)  (((val) >> (shift)) & ((1 << (bits))-1))
 
-#define  MUTEX_COUNTER_SHIFT  2
-#define  MUTEX_COUNTER_MASK   0x1ffc
-#define  MUTEX_SHARED_MASK    0x2000
+/* Mutex state:
+ *
+ * 0 for unlocked
+ * 1 for locked, no waiters
+ * 2 for locked, maybe waiters
+ */
+#define  MUTEX_STATE_SHIFT      0
+#define  MUTEX_STATE_LEN        2
+
+#define  MUTEX_STATE_MASK           FIELD_MASK(MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
+#define  MUTEX_STATE_FROM_BITS(v)   FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
+#define  MUTEX_STATE_TO_BITS(v)     FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
+
+#define  MUTEX_STATE_UNLOCKED            0   /* must be 0 to match __PTHREAD_MUTEX_INIT_VALUE */
+#define  MUTEX_STATE_LOCKED_UNCONTENDED  1   /* must be 1 due to atomic dec in unlock operation */
+#define  MUTEX_STATE_LOCKED_CONTENDED    2   /* must be 1 + LOCKED_UNCONTENDED due to atomic dec */
+
+#define  MUTEX_STATE_FROM_BITS(v)    FIELD_FROM_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
+#define  MUTEX_STATE_TO_BITS(v)      FIELD_TO_BITS(v, MUTEX_STATE_SHIFT, MUTEX_STATE_LEN)
+
+#define  MUTEX_STATE_BITS_UNLOCKED            MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED)
+#define  MUTEX_STATE_BITS_LOCKED_UNCONTENDED  MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED)
+#define  MUTEX_STATE_BITS_LOCKED_CONTENDED    MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED)
+
+/* return true iff the mutex if locked with no waiters */
+#define  MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(v)  (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_UNCONTENDED)
+
+/* return true iff the mutex if locked with maybe waiters */
+#define  MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(v)   (((v) & MUTEX_STATE_MASK) == MUTEX_STATE_BITS_LOCKED_CONTENDED)
+
+/* used to flip from LOCKED_UNCONTENDED to LOCKED_CONTENDED */
+#define  MUTEX_STATE_BITS_FLIP_CONTENTION(v)      ((v) ^ (MUTEX_STATE_BITS_LOCKED_CONTENDED ^ MUTEX_STATE_BITS_LOCKED_UNCONTENDED))
+
+/* Mutex counter:
+ *
+ * We need to check for overflow before incrementing, and we also need to
+ * detect when the counter is 0
+ */
+#define  MUTEX_COUNTER_SHIFT         2
+#define  MUTEX_COUNTER_LEN           11
+#define  MUTEX_COUNTER_MASK          FIELD_MASK(MUTEX_COUNTER_SHIFT, MUTEX_COUNTER_LEN)
+
+#define  MUTEX_COUNTER_BITS_WILL_OVERFLOW(v)    (((v) & MUTEX_COUNTER_MASK) == MUTEX_COUNTER_MASK)
+#define  MUTEX_COUNTER_BITS_IS_ZERO(v)          (((v) & MUTEX_COUNTER_MASK) == 0)
+
+/* Used to increment the counter directly after overflow has been checked */
+#define  MUTEX_COUNTER_BITS_ONE      FIELD_TO_BITS(1,MUTEX_COUNTER_SHIFT,MUTEX_COUNTER_LEN)
+
+/* Returns true iff the counter is 0 */
+#define  MUTEX_COUNTER_BITS_ARE_ZERO(v)  (((v) & MUTEX_COUNTER_MASK) == 0)
+
+/* Mutex shared bit flag
+ *
+ * This flag is set to indicate that the mutex is shared among processes.
+ * This changes the futex opcode we use for futex wait/wake operations
+ * (non-shared operations are much faster).
+ */
+#define  MUTEX_SHARED_SHIFT    13
+#define  MUTEX_SHARED_MASK     FIELD_MASK(MUTEX_SHARED_SHIFT,1)
+
+/* Mutex type:
+ *
+ * We support normal, recursive and errorcheck mutexes.
+ *
+ * The constants defined here *cannot* be changed because they must match
+ * the C library ABI which defines the following initialization values in
+ * <pthread.h>:
+ *
+ *   __PTHREAD_MUTEX_INIT_VALUE
+ *   __PTHREAD_RECURSIVE_MUTEX_VALUE
+ *   __PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE
+ */
+#define  MUTEX_TYPE_SHIFT      14
+#define  MUTEX_TYPE_LEN        2
+#define  MUTEX_TYPE_MASK       FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN)
+
+#define  MUTEX_TYPE_NORMAL          0  /* Must be 0 to match __PTHREAD_MUTEX_INIT_VALUE */
+#define  MUTEX_TYPE_RECURSIVE       1
+#define  MUTEX_TYPE_ERRORCHECK      2
+
+#define  MUTEX_TYPE_TO_BITS(t)       FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN)
+
+#define  MUTEX_TYPE_BITS_NORMAL      MUTEX_TYPE_TO_BITS(MUTEX_TYPE_NORMAL)
+#define  MUTEX_TYPE_BITS_RECURSIVE   MUTEX_TYPE_TO_BITS(MUTEX_TYPE_RECURSIVE)
+#define  MUTEX_TYPE_BITS_ERRORCHECK  MUTEX_TYPE_TO_BITS(MUTEX_TYPE_ERRORCHECK)
+
+/* Mutex owner field:
+ *
+ * This is only used for recursive and errorcheck mutexes. It holds the
+ * kernel TID of the owning thread. Note that this works because the Linux
+ * kernel _only_ uses 16-bit values for thread ids.
+ *
+ * More specifically, it will wrap to 10000 when it reaches over 32768 for
+ * application processes. You can check this by running the following inside
+ * an adb shell session:
+ *
+    OLDPID=$$;
+    while true; do
+    NEWPID=$(sh -c 'echo $$')
+    if [ "$NEWPID" -gt 32768 ]; then
+        echo "AARGH: new PID $NEWPID is too high!"
+        exit 1
+    fi
+    if [ "$NEWPID" -lt "$OLDPID" ]; then
+        echo "****** Wrapping from PID $OLDPID to $NEWPID. *******"
+    else
+        echo -n "$NEWPID!"
+    fi
+    OLDPID=$NEWPID
+    done
+
+ * Note that you can run the same example on a desktop Linux system,
+ * the wrapping will also happen at 32768, but will go back to 300 instead.
+ */
+#define  MUTEX_OWNER_SHIFT     16
+#define  MUTEX_OWNER_LEN       16
+
+#define  MUTEX_OWNER_FROM_BITS(v)    FIELD_FROM_BITS(v,MUTEX_OWNER_SHIFT,MUTEX_OWNER_LEN)
+#define  MUTEX_OWNER_TO_BITS(v)      FIELD_TO_BITS(v,MUTEX_OWNER_SHIFT,MUTEX_OWNER_LEN)
+
+/* Convenience macros.
+ *
+ * These are used to form or modify the bit pattern of a given mutex value
+ */
+
+
 
 /* a mutex attribute holds the following fields
  *
@@ -872,7 +994,7 @@
         return EINVAL;
 
     if (__likely(attr == NULL)) {
-        mutex->value = MUTEX_TYPE_NORMAL;
+        mutex->value = MUTEX_TYPE_BITS_NORMAL;
         return 0;
     }
 
@@ -881,13 +1003,13 @@
 
     switch (*attr & MUTEXATTR_TYPE_MASK) {
     case PTHREAD_MUTEX_NORMAL:
-        value |= MUTEX_TYPE_NORMAL;
+        value |= MUTEX_TYPE_BITS_NORMAL;
         break;
     case PTHREAD_MUTEX_RECURSIVE:
-        value |= MUTEX_TYPE_RECURSIVE;
+        value |= MUTEX_TYPE_BITS_RECURSIVE;
         break;
     case PTHREAD_MUTEX_ERRORCHECK:
-        value |= MUTEX_TYPE_ERRORCHECK;
+        value |= MUTEX_TYPE_BITS_ERRORCHECK;
         break;
     default:
         return EINVAL;
@@ -897,20 +1019,6 @@
     return 0;
 }
 
-int pthread_mutex_destroy(pthread_mutex_t *mutex)
-{
-    int ret;
-
-    /* use trylock to ensure that the mutex value is
-     * valid and is not already locked. */
-    ret = pthread_mutex_trylock(mutex);
-    if (ret != 0)
-        return ret;
-
-    mutex->value = 0xdead10cc;
-    return 0;
-}
-
 
 /*
  * Lock a non-recursive mutex.
@@ -925,23 +1033,25 @@
  * the lock state field.
  */
 static __inline__ void
-_normal_lock(pthread_mutex_t*  mutex)
+_normal_lock(pthread_mutex_t*  mutex, int shared)
 {
-    /* We need to preserve the shared flag during operations */
-    int  shared = mutex->value & MUTEX_SHARED_MASK;
+    /* convenience shortcuts */
+    const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
+    const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
     /*
      * The common case is an unlocked mutex, so we begin by trying to
-     * change the lock's state from 0 to 1.  __atomic_cmpxchg() returns 0
-     * if it made the swap successfully.  If the result is nonzero, this
-     * lock is already held by another thread.
+     * change the lock's state from 0 (UNLOCKED) to 1 (LOCKED).
+     * __bionic_cmpxchg() returns 0 if it made the swap successfully.
+     * If the result is nonzero, this lock is already held by another thread.
      */
-    if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value ) != 0) {
+    if (__bionic_cmpxchg(unlocked, locked_uncontended, &mutex->value) != 0) {
+        const int locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
         /*
          * We want to go to sleep until the mutex is available, which
-         * requires promoting it to state 2.  We need to swap in the new
-         * state value and then wait until somebody wakes us up.
+         * requires promoting it to state 2 (CONTENDED). We need to
+         * swap in the new state value and then wait until somebody wakes us up.
          *
-         * __atomic_swap() returns the previous value.  We swap 2 in and
+         * __bionic_swap() returns the previous value.  We swap 2 in and
          * see if we got zero back; if so, we have acquired the lock.  If
          * not, another thread still holds the lock and we wait again.
          *
@@ -952,8 +1062,8 @@
          * that the mutex is in state 2 when we go to sleep on it, which
          * guarantees a wake-up call.
          */
-        while (__atomic_swap(shared|2, &mutex->value ) != (shared|0))
-            __futex_wait_ex(&mutex->value, shared, shared|2, 0);
+        while (__bionic_swap(locked_contended, &mutex->value) != unlocked)
+            __futex_wait_ex(&mutex->value, shared, locked_contended, 0);
     }
     ANDROID_MEMBAR_FULL();
 }
@@ -963,19 +1073,16 @@
  * that we are in fact the owner of this lock.
  */
 static __inline__ void
-_normal_unlock(pthread_mutex_t*  mutex)
+_normal_unlock(pthread_mutex_t*  mutex, int shared)
 {
     ANDROID_MEMBAR_FULL();
 
-    /* We need to preserve the shared flag during operations */
-    int  shared = mutex->value & MUTEX_SHARED_MASK;
-
     /*
      * The mutex state will be 1 or (rarely) 2.  We use an atomic decrement
-     * to release the lock.  __atomic_dec() returns the previous value;
+     * to release the lock.  __bionic_atomic_dec() returns the previous value;
      * if it wasn't 1 we have to do some additional work.
      */
-    if (__atomic_dec(&mutex->value) != (shared|1)) {
+    if (__bionic_atomic_dec(&mutex->value) != (shared|MUTEX_STATE_BITS_LOCKED_UNCONTENDED)) {
         /*
          * Start by releasing the lock.  The decrement changed it from
          * "contended lock" to "uncontended lock", which means we still
@@ -1017,153 +1124,232 @@
     }
 }
 
-static pthread_mutex_t  __recursive_lock = PTHREAD_MUTEX_INITIALIZER;
-
-static void
-_recursive_lock(void)
+/* This common inlined function is used to increment the counter of an
+ * errorcheck or recursive mutex.
+ *
+ * For errorcheck mutexes, it will return EDEADLK
+ * If the counter overflows, it will return EAGAIN
+ * Otherwise, it atomically increments the counter and returns 0
+ * after providing an acquire barrier.
+ *
+ * mtype is the current mutex type
+ * mvalue is the current mutex value (already loaded)
+ * mutex pointers to the mutex.
+ */
+static __inline__ __attribute__((always_inline)) int
+_recursive_increment(pthread_mutex_t* mutex, int mvalue, int mtype)
 {
-    _normal_lock(&__recursive_lock);
+    if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) {
+        /* trying to re-lock a mutex we already acquired */
+        return EDEADLK;
+    }
+
+    /* Detect recursive lock overflow and return EAGAIN.
+     * This is safe because only the owner thread can modify the
+     * counter bits in the mutex value.
+     */
+    if (MUTEX_COUNTER_BITS_WILL_OVERFLOW(mvalue)) {
+        return EAGAIN;
+    }
+
+    /* We own the mutex, but other threads are able to change
+     * the lower bits (e.g. promoting it to "contended"), so we
+     * need to use an atomic cmpxchg loop to update the counter.
+     */
+    for (;;) {
+        /* increment counter, overflow was already checked */
+        int newval = mvalue + MUTEX_COUNTER_BITS_ONE;
+        if (__likely(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) {
+            /* mutex is still locked, not need for a memory barrier */
+            return 0;
+        }
+        /* the value was changed, this happens when another thread changes
+         * the lower state bits from 1 to 2 to indicate contention. This
+         * cannot change the counter, so simply reload and try again.
+         */
+        mvalue = mutex->value;
+    }
 }
 
-static void
-_recursive_unlock(void)
+__LIBC_HIDDEN__
+int pthread_mutex_lock_impl(pthread_mutex_t *mutex)
 {
-    _normal_unlock(&__recursive_lock );
+    int mvalue, mtype, tid, new_lock_type, shared;
+
+    if (__unlikely(mutex == NULL))
+        return EINVAL;
+
+    mvalue = mutex->value;
+    mtype = (mvalue & MUTEX_TYPE_MASK);
+    shared = (mvalue & MUTEX_SHARED_MASK);
+
+    /* Handle normal case first */
+    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) ) {
+        _normal_lock(mutex, shared);
+        return 0;
+    }
+
+    /* Do we already own this recursive or error-check mutex ? */
+    tid = __get_thread()->kernel_id;
+    if ( tid == MUTEX_OWNER_FROM_BITS(mvalue) )
+        return _recursive_increment(mutex, mvalue, mtype);
+
+    /* Add in shared state to avoid extra 'or' operations below */
+    mtype |= shared;
+
+    /* First, if the mutex is unlocked, try to quickly acquire it.
+     * In the optimistic case where this works, set the state to 1 to
+     * indicate locked with no contention */
+    if (mvalue == mtype) {
+        int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
+        if (__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0) {
+            ANDROID_MEMBAR_FULL();
+            return 0;
+        }
+        /* argh, the value changed, reload before entering the loop */
+        mvalue = mutex->value;
+    }
+
+    for (;;) {
+        int newval;
+
+        /* if the mutex is unlocked, its value should be 'mtype' and
+         * we try to acquire it by setting its owner and state atomically.
+         * NOTE: We put the state to 2 since we _know_ there is contention
+         * when we are in this loop. This ensures all waiters will be
+         * unlocked.
+         */
+        if (mvalue == mtype) {
+            newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_CONTENDED;
+            /* TODO: Change this to __bionic_cmpxchg_acquire when we
+             *        implement it to get rid of the explicit memory
+             *        barrier below.
+             */
+            if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
+                mvalue = mutex->value;
+                continue;
+            }
+            ANDROID_MEMBAR_FULL();
+            return 0;
+        }
+
+        /* the mutex is already locked by another thread, if its state is 1
+         * we will change it to 2 to indicate contention. */
+        if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
+            newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue); /* locked state 1 => state 2 */
+            if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
+                mvalue = mutex->value;
+                continue;
+            }
+            mvalue = newval;
+        }
+
+        /* wait until the mutex is unlocked */
+        __futex_wait_ex(&mutex->value, shared, mvalue, NULL);
+
+        mvalue = mutex->value;
+    }
+    /* NOTREACHED */
 }
 
 int pthread_mutex_lock(pthread_mutex_t *mutex)
 {
-    int mtype, tid, new_lock_type, shared;
-
-    if (__unlikely(mutex == NULL))
-        return EINVAL;
-
-    mtype = (mutex->value & MUTEX_TYPE_MASK);
-    shared = (mutex->value & MUTEX_SHARED_MASK);
-
-    /* Handle normal case first */
-    if ( __likely(mtype == MUTEX_TYPE_NORMAL) ) {
-        _normal_lock(mutex);
-        return 0;
-    }
-
-    /* Do we already own this recursive or error-check mutex ? */
-    tid = __get_thread()->kernel_id;
-    if ( tid == MUTEX_OWNER(mutex) )
-    {
-        int  oldv, counter;
-
-        if (mtype == MUTEX_TYPE_ERRORCHECK) {
-            /* trying to re-lock a mutex we already acquired */
-            return EDEADLK;
+    int err = pthread_mutex_lock_impl(mutex);
+#ifdef PTHREAD_DEBUG
+    if (PTHREAD_DEBUG_ENABLED) {
+        if (!err) {
+            pthread_debug_mutex_lock_check(mutex);
         }
-        /*
-         * We own the mutex, but other threads are able to change
-         * the contents (e.g. promoting it to "contended"), so we
-         * need to hold the global lock.
-         */
-        _recursive_lock();
-        oldv         = mutex->value;
-        counter      = (oldv + (1 << MUTEX_COUNTER_SHIFT)) & MUTEX_COUNTER_MASK;
-        mutex->value = (oldv & ~MUTEX_COUNTER_MASK) | counter;
-        _recursive_unlock();
-        return 0;
     }
-
-    /* We don't own the mutex, so try to get it.
-     *
-     * First, we try to change its state from 0 to 1, if this
-     * doesn't work, try to change it to state 2.
-     */
-    new_lock_type = 1;
-
-    /* compute futex wait opcode and restore shared flag in mtype */
-    mtype |= shared;
-
-    for (;;) {
-        int  oldv;
-
-        _recursive_lock();
-        oldv = mutex->value;
-        if (oldv == mtype) { /* uncontended released lock => 1 or 2 */
-            mutex->value = ((tid << 16) | mtype | new_lock_type);
-        } else if ((oldv & 3) == 1) { /* locked state 1 => state 2 */
-            oldv ^= 3;
-            mutex->value = oldv;
-        }
-        _recursive_unlock();
-
-        if (oldv == mtype)
-            break;
-
-        /*
-         * The lock was held, possibly contended by others.  From
-         * now on, if we manage to acquire the lock, we have to
-         * assume that others are still contending for it so that
-         * we'll wake them when we unlock it.
-         */
-        new_lock_type = 2;
-
-        __futex_wait_ex(&mutex->value, shared, oldv, NULL);
-    }
-    return 0;
+#endif
+    return err;
 }
 
-
-int pthread_mutex_unlock(pthread_mutex_t *mutex)
+__LIBC_HIDDEN__
+int pthread_mutex_unlock_impl(pthread_mutex_t *mutex)
 {
-    int mtype, tid, oldv, shared;
+    int mvalue, mtype, tid, oldv, shared;
 
     if (__unlikely(mutex == NULL))
         return EINVAL;
 
-    mtype  = (mutex->value & MUTEX_TYPE_MASK);
-    shared = (mutex->value & MUTEX_SHARED_MASK);
+    mvalue = mutex->value;
+    mtype  = (mvalue & MUTEX_TYPE_MASK);
+    shared = (mvalue & MUTEX_SHARED_MASK);
 
     /* Handle common case first */
-    if (__likely(mtype == MUTEX_TYPE_NORMAL)) {
-        _normal_unlock(mutex);
+    if (__likely(mtype == MUTEX_TYPE_BITS_NORMAL)) {
+        _normal_unlock(mutex, shared);
         return 0;
     }
 
     /* Do we already own this recursive or error-check mutex ? */
     tid = __get_thread()->kernel_id;
-    if ( tid != MUTEX_OWNER(mutex) )
+    if ( tid != MUTEX_OWNER_FROM_BITS(mvalue) )
         return EPERM;
 
-    /* We do, decrement counter or release the mutex if it is 0 */
-    _recursive_lock();
-    oldv = mutex->value;
-    if (oldv & MUTEX_COUNTER_MASK) {
-        mutex->value = oldv - (1 << MUTEX_COUNTER_SHIFT);
-        oldv = 0;
-    } else {
-        mutex->value = shared | mtype;
+    /* If the counter is > 0, we can simply decrement it atomically.
+     * Since other threads can mutate the lower state bits (and only the
+     * lower state bits), use a cmpxchg to do it.
+     */
+    if (!MUTEX_COUNTER_BITS_IS_ZERO(mvalue)) {
+        for (;;) {
+            int newval = mvalue - MUTEX_COUNTER_BITS_ONE;
+            if (__likely(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) {
+                /* success: we still own the mutex, so no memory barrier */
+                return 0;
+            }
+            /* the value changed, so reload and loop */
+            mvalue = mutex->value;
+        }
     }
-    _recursive_unlock();
+
+    /* the counter is 0, so we're going to unlock the mutex by resetting
+     * its value to 'unlocked'. We need to perform a swap in order
+     * to read the current state, which will be 2 if there are waiters
+     * to awake.
+     *
+     * TODO: Change this to __bionic_swap_release when we implement it
+     *        to get rid of the explicit memory barrier below.
+     */
+    ANDROID_MEMBAR_FULL();  /* RELEASE BARRIER */
+    mvalue = __bionic_swap(mtype | shared | MUTEX_STATE_BITS_UNLOCKED, &mutex->value);
 
     /* Wake one waiting thread, if any */
-    if ((oldv & 3) == 2) {
+    if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(mvalue)) {
         __futex_wake_ex(&mutex->value, shared, 1);
     }
     return 0;
 }
 
-
-int pthread_mutex_trylock(pthread_mutex_t *mutex)
+int pthread_mutex_unlock(pthread_mutex_t *mutex)
 {
-    int mtype, tid, oldv, shared;
+#ifdef PTHREAD_DEBUG
+    if (PTHREAD_DEBUG_ENABLED) {
+        pthread_debug_mutex_unlock_check(mutex);
+    }
+#endif
+    return pthread_mutex_unlock_impl(mutex);
+}
+
+__LIBC_HIDDEN__
+int pthread_mutex_trylock_impl(pthread_mutex_t *mutex)
+{
+    int mvalue, mtype, tid, oldv, shared;
 
     if (__unlikely(mutex == NULL))
         return EINVAL;
 
-    mtype  = (mutex->value & MUTEX_TYPE_MASK);
-    shared = (mutex->value & MUTEX_SHARED_MASK);
+    mvalue = mutex->value;
+    mtype  = (mvalue & MUTEX_TYPE_MASK);
+    shared = (mvalue & MUTEX_SHARED_MASK);
 
     /* Handle common case first */
-    if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
+    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) )
     {
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+        if (__bionic_cmpxchg(shared|MUTEX_STATE_BITS_UNLOCKED,
+                             shared|MUTEX_STATE_BITS_LOCKED_UNCONTENDED,
+                             &mutex->value) == 0) {
             ANDROID_MEMBAR_FULL();
             return 0;
         }
@@ -1173,39 +1359,36 @@
 
     /* Do we already own this recursive or error-check mutex ? */
     tid = __get_thread()->kernel_id;
-    if ( tid == MUTEX_OWNER(mutex) )
-    {
-        int counter;
+    if ( tid == MUTEX_OWNER_FROM_BITS(mvalue) )
+        return _recursive_increment(mutex, mvalue, mtype);
 
-        if (mtype == MUTEX_TYPE_ERRORCHECK) {
-            /* already locked by ourselves */
-            return EDEADLK;
-        }
+    /* Same as pthread_mutex_lock, except that we don't want to wait, and
+     * the only operation that can succeed is a single cmpxchg to acquire the
+     * lock if it is released / not owned by anyone. No need for a complex loop.
+     */
+    mtype |= shared | MUTEX_STATE_BITS_UNLOCKED;
+    mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
 
-        _recursive_lock();
-        oldv = mutex->value;
-        counter = (oldv + (1 << MUTEX_COUNTER_SHIFT)) & MUTEX_COUNTER_MASK;
-        mutex->value = (oldv & ~MUTEX_COUNTER_MASK) | counter;
-        _recursive_unlock();
+    if (__likely(__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0)) {
+        ANDROID_MEMBAR_FULL();
         return 0;
     }
 
-    /* Restore sharing bit in mtype */
-    mtype |= shared;
-
-    /* Try to lock it, just once. */
-    _recursive_lock();
-    oldv = mutex->value;
-    if (oldv == mtype)  /* uncontended released lock => state 1 */
-        mutex->value = ((tid << 16) | mtype | 1);
-    _recursive_unlock();
-
-    if (oldv != mtype)
-        return EBUSY;
-
-    return 0;
+    return EBUSY;
 }
 
+int pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+    int err = pthread_mutex_trylock_impl(mutex);
+#ifdef PTHREAD_DEBUG
+    if (PTHREAD_DEBUG_ENABLED) {
+        if (!err) {
+            pthread_debug_mutex_lock_check(mutex);
+        }
+    }
+#endif
+    return err;
+}
 
 /* initialize 'ts' with the difference between 'abstime' and the current time
  * according to 'clock'. Returns -1 if abstime already expired, or 0 otherwise.
@@ -1241,12 +1424,13 @@
     }
 }
 
-int pthread_mutex_lock_timeout_np(pthread_mutex_t *mutex, unsigned msecs)
+__LIBC_HIDDEN__
+int pthread_mutex_lock_timeout_np_impl(pthread_mutex_t *mutex, unsigned msecs)
 {
     clockid_t        clock = CLOCK_MONOTONIC;
     struct timespec  abstime;
     struct timespec  ts;
-    int              mtype, tid, oldv, new_lock_type, shared;
+    int               mvalue, mtype, tid, oldv, new_lock_type, shared;
 
     /* compute absolute expiration time */
     __timespec_to_relative_msec(&abstime, msecs, clock);
@@ -1254,24 +1438,29 @@
     if (__unlikely(mutex == NULL))
         return EINVAL;
 
-    mtype  = (mutex->value & MUTEX_TYPE_MASK);
-    shared = (mutex->value & MUTEX_SHARED_MASK);
+    mvalue = mutex->value;
+    mtype  = (mvalue & MUTEX_TYPE_MASK);
+    shared = (mvalue & MUTEX_SHARED_MASK);
 
     /* Handle common case first */
-    if ( __likely(mtype == MUTEX_TYPE_NORMAL) )
+    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) )
     {
-        /* fast path for uncontended lock */
-        if (__atomic_cmpxchg(shared|0, shared|1, &mutex->value) == 0) {
+        const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
+        const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
+        const int locked_contended   = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
+
+        /* fast path for uncontended lock. Note: MUTEX_TYPE_BITS_NORMAL is 0 */
+        if (__bionic_cmpxchg(unlocked, locked_uncontended, &mutex->value) == 0) {
             ANDROID_MEMBAR_FULL();
             return 0;
         }
 
         /* loop while needed */
-        while (__atomic_swap(shared|2, &mutex->value) != (shared|0)) {
+        while (__bionic_swap(locked_contended, &mutex->value) != unlocked) {
             if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
                 return EBUSY;
 
-            __futex_wait_ex(&mutex->value, shared, shared|2, &ts);
+            __futex_wait_ex(&mutex->value, shared, locked_contended, &ts);
         }
         ANDROID_MEMBAR_FULL();
         return 0;
@@ -1279,66 +1468,106 @@
 
     /* Do we already own this recursive or error-check mutex ? */
     tid = __get_thread()->kernel_id;
-    if ( tid == MUTEX_OWNER(mutex) )
-    {
-        int  oldv, counter;
+    if ( tid == MUTEX_OWNER_FROM_BITS(mvalue) )
+        return _recursive_increment(mutex, mvalue, mtype);
 
-        if (mtype == MUTEX_TYPE_ERRORCHECK) {
-            /* already locked by ourselves */
-            return EDEADLK;
+    /* the following implements the same loop than pthread_mutex_lock_impl
+     * but adds checks to ensure that the operation never exceeds the
+     * absolute expiration time.
+     */
+    mtype |= shared;
+
+    /* first try a quick lock */
+    if (mvalue == mtype) {
+        mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
+        if (__likely(__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0)) {
+            ANDROID_MEMBAR_FULL();
+            return 0;
         }
-
-        _recursive_lock();
-        oldv = mutex->value;
-        counter = (oldv + (1 << MUTEX_COUNTER_SHIFT)) & MUTEX_COUNTER_MASK;
-        mutex->value = (oldv & ~MUTEX_COUNTER_MASK) | counter;
-        _recursive_unlock();
-        return 0;
+        mvalue = mutex->value;
     }
 
-    /* We don't own the mutex, so try to get it.
-     *
-     * First, we try to change its state from 0 to 1, if this
-     * doesn't work, try to change it to state 2.
-     */
-    new_lock_type = 1;
-
-    /* Compute wait op and restore sharing bit in mtype */
-    mtype  |= shared;
-
     for (;;) {
-        int  oldv;
-        struct timespec  ts;
+        struct timespec ts;
 
-        _recursive_lock();
-        oldv = mutex->value;
-        if (oldv == mtype) { /* uncontended released lock => 1 or 2 */
-            mutex->value = ((tid << 16) | mtype | new_lock_type);
-        } else if ((oldv & 3) == 1) { /* locked state 1 => state 2 */
-            oldv ^= 3;
-            mutex->value = oldv;
+        /* if the value is 'unlocked', try to acquire it directly */
+        /* NOTE: put state to 2 since we know there is contention */
+        if (mvalue == mtype) /* unlocked */ {
+            mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_CONTENDED;
+            if (__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0) {
+                ANDROID_MEMBAR_FULL();
+                return 0;
+            }
+            /* the value changed before we could lock it. We need to check
+             * the time to avoid livelocks, reload the value, then loop again. */
+            if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
+                return EBUSY;
+
+            mvalue = mutex->value;
+            continue;
         }
-        _recursive_unlock();
 
-        if (oldv == mtype)
-            break;
+        /* The value is locked. If 'uncontended', try to switch its state
+         * to 'contented' to ensure we get woken up later. */
+        if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
+            int newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue);
+            if (__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0) {
+                /* this failed because the value changed, reload it */
+                mvalue = mutex->value;
+            } else {
+                /* this succeeded, update mvalue */
+                mvalue = newval;
+            }
+        }
 
-        /*
-         * The lock was held, possibly contended by others.  From
-         * now on, if we manage to acquire the lock, we have to
-         * assume that others are still contending for it so that
-         * we'll wake them when we unlock it.
-         */
-        new_lock_type = 2;
-
+        /* check time and update 'ts' */
         if (__timespec_to_absolute(&ts, &abstime, clock) < 0)
             return EBUSY;
 
-        __futex_wait_ex(&mutex->value, shared, oldv, &ts);
+        /* Only wait to be woken up if the state is '2', otherwise we'll
+         * simply loop right now. This can happen when the second cmpxchg
+         * in our loop failed because the mutex was unlocked by another
+         * thread.
+         */
+        if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(mvalue)) {
+            if (__futex_wait_ex(&mutex->value, shared, mvalue, &ts) == ETIMEDOUT) {
+                return EBUSY;
+            }
+            mvalue = mutex->value;
+        }
     }
+    /* NOTREACHED */
+}
+
+int pthread_mutex_lock_timeout_np(pthread_mutex_t *mutex, unsigned msecs)
+{
+    int err = pthread_mutex_lock_timeout_np_impl(mutex, msecs);
+#ifdef PTHREAD_DEBUG
+    if (PTHREAD_DEBUG_ENABLED) {
+        if (!err) {
+            pthread_debug_mutex_lock_check(mutex);
+        }
+    }
+#endif
+    return err;
+}
+
+int pthread_mutex_destroy(pthread_mutex_t *mutex)
+{
+    int ret;
+
+    /* use trylock to ensure that the mutex value is
+     * valid and is not already locked. */
+    ret = pthread_mutex_trylock_impl(mutex);
+    if (ret != 0)
+        return ret;
+
+    mutex->value = 0xdead10cc;
     return 0;
 }
 
+
+
 int pthread_condattr_init(pthread_condattr_t *attr)
 {
     if (attr == NULL)
@@ -1436,7 +1665,7 @@
         long oldval = cond->value;
         long newval = ((oldval - COND_COUNTER_INCREMENT) & COND_COUNTER_MASK)
                       | flags;
-        if (__atomic_cmpxchg(oldval, newval, &cond->value) == 0)
+        if (__bionic_cmpxchg(oldval, newval, &cond->value) == 0)
             break;
     }
 
@@ -1844,7 +2073,7 @@
 }
 
 // man says this should be in <linux/unistd.h>, but it isn't
-extern int tkill(int tid, int sig);
+extern int tgkill(int tgid, int tid, int sig);
 
 int pthread_kill(pthread_t tid, int sig)
 {
@@ -1852,7 +2081,7 @@
     int  old_errno = errno;
     pthread_internal_t * thread = (pthread_internal_t *)tid;
 
-    ret = tkill(thread->kernel_id, sig);
+    ret = tgkill(getpid(), thread->kernel_id, sig);
     if (ret < 0) {
         ret = errno;
         errno = old_errno;
@@ -1894,7 +2123,7 @@
     /* 'in_set_ptr' is the second parameter to __rt_sigprocmask. It must be NULL
      * if 'set' is NULL to ensure correct semantics (which in this case would
      * be to ignore 'how' and return the current signal set into 'oset'.
-      */
+     */
     if (set == NULL) {
         in_set_ptr = NULL;
     } else {
@@ -1934,18 +2163,70 @@
 {
     static pthread_mutex_t   once_lock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER;
     volatile pthread_once_t* ocptr = once_control;
+    pthread_once_t value;
 
-    pthread_once_t tmp = *ocptr;
-    ANDROID_MEMBAR_FULL();
-    if (tmp == PTHREAD_ONCE_INIT) {
-        pthread_mutex_lock( &once_lock );
-        if (*ocptr == PTHREAD_ONCE_INIT) {
-            (*init_routine)();
-            ANDROID_MEMBAR_FULL();
-            *ocptr = ~PTHREAD_ONCE_INIT;
-        }
-        pthread_mutex_unlock( &once_lock );
+    /* PTHREAD_ONCE_INIT is 0, we use the following bit flags
+     *
+     *   bit 0 set  -> initialization is under way
+     *   bit 1 set  -> initialization is complete
+     */
+#define ONCE_INITIALIZING           (1 << 0)
+#define ONCE_COMPLETED              (1 << 1)
+
+    /* First check if the once is already initialized. This will be the common
+    * case and we want to make this as fast as possible. Note that this still
+    * requires a load_acquire operation here to ensure that all the
+    * stores performed by the initialization function are observable on
+    * this CPU after we exit.
+    */
+    if (__likely((*ocptr & ONCE_COMPLETED) != 0)) {
+        ANDROID_MEMBAR_FULL();
+        return 0;
     }
+
+    for (;;) {
+        /* Try to atomically set the INITIALIZING flag.
+         * This requires a cmpxchg loop, and we may need
+         * to exit prematurely if we detect that 
+         * COMPLETED is now set.
+         */
+        int32_t  oldval, newval;
+
+        do {
+            oldval = *ocptr;
+            if ((oldval & ONCE_COMPLETED) != 0)
+                break;
+
+            newval = oldval | ONCE_INITIALIZING;
+        } while (__bionic_cmpxchg(oldval, newval, ocptr) != 0);
+
+        if ((oldval & ONCE_COMPLETED) != 0) {
+            /* We detected that COMPLETED was set while in our loop */
+            ANDROID_MEMBAR_FULL();
+            return 0;
+        }
+
+        if ((oldval & ONCE_INITIALIZING) == 0) {
+            /* We got there first, we can jump out of the loop to
+             * handle the initialization */
+            break;
+        }
+
+        /* Another thread is running the initialization and hasn't completed
+         * yet, so wait for it, then try again. */
+        __futex_wait_ex(ocptr, 0, oldval, NULL);
+    }
+
+    /* call the initialization function. */
+    (*init_routine)();
+
+    /* Do a store_release indicating that initialization is complete */
+    ANDROID_MEMBAR_FULL();
+    *ocptr = ONCE_COMPLETED;
+
+    /* Wake up any waiters, if any */
+    __futex_wake_ex(ocptr, 0, INT_MAX);
+
     return 0;
 }
 
diff --git a/libc/bionic/pthread_debug.c b/libc/bionic/pthread_debug.c
new file mode 100644
index 0000000..7ee208c
--- /dev/null
+++ b/libc/bionic/pthread_debug.c
@@ -0,0 +1,903 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/atomics.h>
+#include <sys/system_properties.h>
+#include <sys/mman.h>
+
+#if HAVE_DLADDR
+#include <dlfcn.h>
+#endif
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+#include <unwind.h>
+#include <unistd.h>
+
+#include "logd.h"
+#include "bionic_tls.h"
+
+/*
+ * ===========================================================================
+ *      Deadlock prediction
+ * ===========================================================================
+ */
+/*
+The idea is to predict the possibility of deadlock by recording the order
+in which locks are acquired.  If we see an attempt to acquire a lock
+out of order, we can identify the locks and offending code.
+
+To make this work, we need to keep track of the locks held by each thread,
+and create history trees for each lock.  When a thread tries to acquire
+a new lock, we walk through the "history children" of the lock, looking
+for a match with locks the thread already holds.  If we find a match,
+it means the thread has made a request that could result in a deadlock.
+
+To support recursive locks, we always allow re-locking a currently-held
+lock, and maintain a recursion depth count.
+
+An ASCII-art example, where letters represent locks:
+
+        A
+       /|\
+      / | \
+     B  |  D
+      \ |
+       \|
+        C
+
+The above is the tree we'd have after handling lock synchronization
+sequences "ABC", "AC", "AD".  A has three children, {B, C, D}.  C is also
+a child of B.  (The lines represent pointers between parent and child.
+Every node can have multiple parents and multiple children.)
+
+If we hold AC, and want to lock B, we recursively search through B's
+children to see if A or C appears.  It does, so we reject the attempt.
+(A straightforward way to implement it: add a link from C to B, then
+determine whether the graph starting at B contains a cycle.)
+
+If we hold AC and want to lock D, we would succeed, creating a new link
+from C to D.
+
+Updates to MutexInfo structs are only allowed for the thread that holds
+the lock, so we actually do most of our deadlock prediction work after
+the lock has been acquired.
+*/
+
+// =============================================================================
+// log functions
+// =============================================================================
+
+#define LOGD(format, ...)  \
+    __libc_android_log_print(ANDROID_LOG_DEBUG, \
+            "pthread_debug", (format), ##__VA_ARGS__ )
+
+#define LOGW(format, ...)  \
+    __libc_android_log_print(ANDROID_LOG_WARN, \
+            "pthread_debug", (format), ##__VA_ARGS__ )
+
+#define LOGE(format, ...)  \
+    __libc_android_log_print(ANDROID_LOG_ERROR, \
+            "pthread_debug", (format), ##__VA_ARGS__ )
+
+#define LOGI(format, ...)  \
+    __libc_android_log_print(ANDROID_LOG_INFO, \
+            "pthread_debug", (format), ##__VA_ARGS__ )
+
+static const char* const kStartBanner =
+        "===============================================================";
+
+static const char* const kEndBanner =
+        "===============================================================";
+
+extern char* __progname;
+
+// =============================================================================
+// map info functions
+// =============================================================================
+
+typedef struct mapinfo {
+    struct mapinfo *next;
+    unsigned start;
+    unsigned end;
+    char name[];
+} mapinfo;
+
+static mapinfo* sMapInfo = NULL;
+
+static mapinfo *parse_maps_line(char *line)
+{
+    mapinfo *mi;
+    int len = strlen(line);
+
+    if(len < 1) return 0;
+    line[--len] = 0;
+
+    if(len < 50) return 0;
+    if(line[20] != 'x') return 0;
+
+    mi = malloc(sizeof(mapinfo) + (len - 47));
+    if(mi == 0) return 0;
+
+    mi->start = strtoul(line, 0, 16);
+    mi->end = strtoul(line + 9, 0, 16);
+    /* To be filled in parse_elf_info if the mapped section starts with
+     * elf_header
+     */
+    mi->next = 0;
+    strcpy(mi->name, line + 49);
+
+    return mi;
+}
+
+static mapinfo *init_mapinfo(int pid)
+{
+    struct mapinfo *milist = NULL;
+    char data[1024];
+    sprintf(data, "/proc/%d/maps", pid);
+    FILE *fp = fopen(data, "r");
+    if(fp) {
+        while(fgets(data, sizeof(data), fp)) {
+            mapinfo *mi = parse_maps_line(data);
+            if(mi) {
+                mi->next = milist;
+                milist = mi;
+            }
+        }
+        fclose(fp);
+    }
+
+    return milist;
+}
+
+static void deinit_mapinfo(mapinfo *mi)
+{
+   mapinfo *del;
+   while(mi) {
+       del = mi;
+       mi = mi->next;
+       free(del);
+   }
+}
+
+/* Find the containing map info for the pc */
+static const mapinfo *pc_to_mapinfo(mapinfo *mi, unsigned pc, unsigned *rel_pc)
+{
+    *rel_pc = pc;
+    while(mi) {
+        if((pc >= mi->start) && (pc < mi->end)){
+            // Only calculate the relative offset for shared libraries
+            if (strstr(mi->name, ".so")) {
+                *rel_pc -= mi->start;
+            }
+            return mi;
+        }
+        mi = mi->next;
+    }
+    return NULL;
+}
+
+// =============================================================================
+// stack trace functions
+// =============================================================================
+
+#define STACK_TRACE_DEPTH 16
+
+typedef struct
+{
+    size_t count;
+    intptr_t* addrs;
+} stack_crawl_state_t;
+
+/* depends how the system includes define this */
+#ifdef HAVE_UNWIND_CONTEXT_STRUCT
+typedef struct _Unwind_Context __unwind_context;
+#else
+typedef _Unwind_Context __unwind_context;
+#endif
+
+static _Unwind_Reason_Code trace_function(__unwind_context *context, void *arg)
+{
+    stack_crawl_state_t* state = (stack_crawl_state_t*)arg;
+    if (state->count) {
+        intptr_t ip = (intptr_t)_Unwind_GetIP(context);
+        if (ip) {
+            state->addrs[0] = ip;
+            state->addrs++;
+            state->count--;
+            return _URC_NO_REASON;
+        }
+    }
+    /*
+     * If we run out of space to record the address or 0 has been seen, stop
+     * unwinding the stack.
+     */
+    return _URC_END_OF_STACK;
+}
+
+static inline
+int get_backtrace(intptr_t* addrs, size_t max_entries)
+{
+    stack_crawl_state_t state;
+    state.count = max_entries;
+    state.addrs = (intptr_t*)addrs;
+    _Unwind_Backtrace(trace_function, (void*)&state);
+    return max_entries - state.count;
+}
+
+static void log_backtrace(intptr_t* addrs, size_t c)
+{
+    int index = 0;
+    size_t i;
+    for (i=0 ; i<c; i++) {
+        unsigned int relpc;
+        void* offset = 0;
+        const char* symbol = NULL;
+
+#if HAVE_DLADDR
+        Dl_info info;
+        if (dladdr((void*)addrs[i], &info)) {
+            offset = info.dli_saddr;
+            symbol = info.dli_sname;
+        }
+#endif
+
+        if (symbol || index>0 || !HAVE_DLADDR) {
+            /*
+             * this test is a bit sketchy, but it allows us to skip the
+             * stack trace entries due to this debugging code. it works
+             * because those don't have a symbol (they're not exported)
+             */
+            mapinfo const* mi = pc_to_mapinfo(sMapInfo, addrs[i], &relpc);
+            char const* soname = mi ? mi->name : NULL;
+#if HAVE_DLADDR
+            if (!soname)
+                soname = info.dli_fname;
+#endif
+            if (!soname)
+                soname = "unknown";
+
+            if (symbol) {
+                LOGW("          "
+                     "#%02d  pc %08lx  %s (%s+0x%x)",
+                     index, relpc, soname, symbol,
+                     addrs[i] - (intptr_t)offset);
+            } else {
+                LOGW("          "
+                     "#%02d  pc %08lx  %s",
+                     index, relpc, soname);
+            }
+            index++;
+        }
+    }
+}
+
+/****************************************************************************/
+
+/*
+ * level <= 0 : deadlock prediction disabled
+ * level    1 : deadlock prediction enabled, w/o call stacks
+ * level    2 : deadlock prediction enabled w/ call stacks
+ */
+#define CAPTURE_CALLSTACK 2
+static int sPthreadDebugLevel = 0;
+static pid_t sPthreadDebugDisabledThread = -1;
+static pthread_mutex_t sDbgLock = PTHREAD_MUTEX_INITIALIZER;
+
+/****************************************************************************/
+
+/* some simple/lame malloc replacement
+ * NOT thread-safe and leaks everything
+ */
+
+#define DBG_ALLOC_BLOCK_SIZE PAGESIZE
+static size_t sDbgAllocOffset = DBG_ALLOC_BLOCK_SIZE;
+static char* sDbgAllocPtr = NULL;
+
+static void* DbgAllocLocked(size_t size) {
+    if ((sDbgAllocOffset + size) > DBG_ALLOC_BLOCK_SIZE) {
+        sDbgAllocOffset = 0;
+        sDbgAllocPtr = mmap(NULL, DBG_ALLOC_BLOCK_SIZE, PROT_READ|PROT_WRITE,
+                MAP_ANON | MAP_PRIVATE, 0, 0);
+        if (sDbgAllocPtr == MAP_FAILED) {
+            return NULL;
+        }
+    }
+    void* addr = sDbgAllocPtr + sDbgAllocOffset;
+    sDbgAllocOffset += size;
+    return addr;
+}
+
+static void* debug_realloc(void *ptr, size_t size, size_t old_size) {
+    void* addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
+            MAP_ANON | MAP_PRIVATE, 0, 0);
+    if (addr != MAP_FAILED) {
+        if (ptr) {
+            memcpy(addr, ptr, old_size);
+            munmap(ptr, old_size);
+        }
+    } else {
+        addr = NULL;
+    }
+    return addr;
+}
+
+/*****************************************************************************/
+
+struct MutexInfo;
+
+typedef struct CallStack {
+    intptr_t    depth;
+    intptr_t*   addrs;
+} CallStack;
+
+typedef struct MutexInfo* MutexInfoListEntry;
+typedef struct CallStack  CallStackListEntry;
+
+typedef struct GrowingList {
+    int alloc;
+    int count;
+    union {
+        void*               data;
+        MutexInfoListEntry* list;
+        CallStackListEntry* stack;
+    };
+} GrowingList;
+
+typedef GrowingList MutexInfoList;
+typedef GrowingList CallStackList;
+
+typedef struct MutexInfo {
+    // thread currently holding the lock or 0
+    pid_t               owner;
+
+    // most-recently-locked doubly-linked list
+    struct MutexInfo*   prev;
+    struct MutexInfo*   next;
+
+    // for reentrant locks
+    int                 lockCount;
+    // when looking for loops in the graph, marks visited nodes
+    int                 historyMark;
+    // the actual mutex
+    pthread_mutex_t*    mutex;
+    // list of locks directly acquired AFTER this one in the same thread
+    MutexInfoList       children;
+    // list of locks directly acquired BEFORE this one in the same thread
+    MutexInfoList       parents;
+    // list of call stacks when a new link is established to this lock form its parent
+    CallStackList       stacks;
+    // call stack when this lock was acquired last
+    int                 stackDepth;
+    intptr_t            stackTrace[STACK_TRACE_DEPTH];
+} MutexInfo;
+
+static void growingListInit(GrowingList* list) {
+    list->alloc = 0;
+    list->count = 0;
+    list->data = NULL;
+}
+
+static void growingListAdd(GrowingList* pList, size_t objSize) {
+    if (pList->count == pList->alloc) {
+        size_t oldsize = pList->alloc * objSize;
+        pList->alloc += PAGESIZE / objSize;
+        size_t size = pList->alloc * objSize;
+        pList->data = debug_realloc(pList->data, size, oldsize);
+    }
+    pList->count++;
+}
+
+static void initMutexInfo(MutexInfo* object, pthread_mutex_t* mutex) {
+    object->owner = 0;
+    object->prev = 0;
+    object->next = 0;
+    object->lockCount = 0;
+    object->historyMark = 0;
+    object->mutex = mutex;
+    growingListInit(&object->children);
+    growingListInit(&object->parents);
+    growingListInit(&object->stacks);
+    object->stackDepth = 0;
+}
+
+typedef struct ThreadInfo {
+    pid_t       pid;
+    MutexInfo*  mrl;
+} ThreadInfo;
+
+static void initThreadInfo(ThreadInfo* object, pid_t pid) {
+    object->pid = pid;
+    object->mrl = NULL;
+}
+
+/****************************************************************************/
+
+static MutexInfo* get_mutex_info(pthread_mutex_t *mutex);
+static void mutex_lock_checked(MutexInfo* mrl, MutexInfo* object);
+static void mutex_unlock_checked(MutexInfo* object);
+
+/****************************************************************************/
+
+extern int pthread_mutex_lock_impl(pthread_mutex_t *mutex);
+extern int pthread_mutex_unlock_impl(pthread_mutex_t *mutex);
+
+static int pthread_mutex_lock_unchecked(pthread_mutex_t *mutex) {
+    return pthread_mutex_lock_impl(mutex);
+}
+
+static int pthread_mutex_unlock_unchecked(pthread_mutex_t *mutex) {
+    return pthread_mutex_unlock_impl(mutex);
+}
+
+/****************************************************************************/
+
+static void dup_backtrace(CallStack* stack, int count, intptr_t const* addrs) {
+    stack->depth = count;
+    stack->addrs = DbgAllocLocked(count * sizeof(intptr_t));
+    memcpy(stack->addrs, addrs, count * sizeof(intptr_t));
+}
+
+/****************************************************************************/
+
+static int historyListHas(
+        const MutexInfoList* list, MutexInfo const * obj) {
+    int i;
+    for (i=0; i<list->count; i++) {
+        if (list->list[i] == obj) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+static void historyListAdd(MutexInfoList* pList, MutexInfo* obj) {
+    growingListAdd(pList, sizeof(MutexInfoListEntry));
+    pList->list[pList->count - 1] = obj;
+}
+
+static int historyListRemove(MutexInfoList* pList, MutexInfo* obj) {
+    int i;
+    for (i = pList->count-1; i >= 0; i--) {
+        if (pList->list[i] == obj) {
+            break;
+        }
+    }
+    if (i < 0) {
+        // not found!
+        return 0;
+    }
+
+    if (i != pList->count-1) {
+        // copy the last entry to the new free slot
+        pList->list[i] = pList->list[pList->count-1];
+    }
+    pList->count--;
+    memset(&pList->list[pList->count], 0, sizeof(MutexInfoListEntry));
+    return 1;
+}
+
+static void linkParentToChild(MutexInfo* parent, MutexInfo* child) {
+    historyListAdd(&parent->children, child);
+    historyListAdd(&child->parents, parent);
+}
+
+static void unlinkParentFromChild(MutexInfo* parent, MutexInfo* child) {
+    historyListRemove(&parent->children, child);
+    historyListRemove(&child->parents, parent);
+}
+
+/****************************************************************************/
+
+static void callstackListAdd(CallStackList* pList,
+        int count, intptr_t const* addrs) {
+    growingListAdd(pList, sizeof(CallStackListEntry));
+    dup_backtrace(&pList->stack[pList->count - 1], count, addrs);
+}
+
+/****************************************************************************/
+
+/*
+ * Recursively traverse the object hierarchy starting at "obj".  We mark
+ * ourselves on entry and clear the mark on exit.  If we ever encounter
+ * a marked object, we have a cycle.
+ *
+ * Returns "true" if all is well, "false" if we found a cycle.
+ */
+
+static int traverseTree(MutexInfo* obj, MutexInfo const* objParent)
+{
+    /*
+     * Have we been here before?
+     */
+    if (obj->historyMark) {
+        int stackDepth;
+        intptr_t addrs[STACK_TRACE_DEPTH];
+
+        /* Turn off prediction temporarily in this thread while logging */
+        sPthreadDebugDisabledThread = gettid();
+
+        if (sMapInfo == NULL) {
+            // note: we're protected by sDbgLock
+            sMapInfo = init_mapinfo(getpid());
+        }
+
+        LOGW("%s\n", kStartBanner);
+        LOGW("pid: %d, tid: %d >>> %s <<<", getpid(), gettid(), __progname);
+        LOGW("Illegal lock attempt:\n");
+        LOGW("--- pthread_mutex_t at %p\n", obj->mutex);
+        stackDepth = get_backtrace(addrs, STACK_TRACE_DEPTH);
+        log_backtrace(addrs, stackDepth);
+
+        LOGW("+++ Currently held locks in this thread (in reverse order):");
+        MutexInfo* cur = obj;
+        pid_t ourtid = gettid();
+        int i;
+        for (i=0 ; i<cur->parents.count ; i++) {
+            MutexInfo* parent = cur->parents.list[i];
+            if (parent->owner == ourtid) {
+                LOGW("--- pthread_mutex_t at %p\n", parent->mutex);
+                if (sPthreadDebugLevel >= CAPTURE_CALLSTACK) {
+                    log_backtrace(parent->stackTrace, parent->stackDepth);
+                }
+                cur = parent;
+                break;
+            }
+        }
+
+        LOGW("+++ Earlier, the following lock order (from last to first) was established\n");
+        return 0;
+    }
+
+    obj->historyMark = 1;
+
+    MutexInfoList* pList = &obj->children;
+    int result = 1;
+    int i;
+    for (i = pList->count-1; i >= 0; i--) {
+        MutexInfo* child = pList->list[i];
+        if (!traverseTree(child,  obj)) {
+            LOGW("--- pthread_mutex_t at %p\n", obj->mutex);
+            if (sPthreadDebugLevel >= CAPTURE_CALLSTACK) {
+                int index = historyListHas(&obj->parents, objParent);
+                if ((size_t)index < (size_t)obj->stacks.count) {
+                    log_backtrace(
+                            obj->stacks.stack[index].addrs,
+                            obj->stacks.stack[index].depth);
+                } else {
+                    log_backtrace(
+                            obj->stackTrace,
+                            obj->stackDepth);
+                }
+            }
+            result = 0;
+            break;
+        }
+    }
+
+    obj->historyMark = 0;
+    return result;
+}
+
+/****************************************************************************/
+
+static void mutex_lock_checked(MutexInfo* mrl, MutexInfo* object)
+{
+    pid_t tid = gettid();
+    if (object->owner == tid) {
+        object->lockCount++;
+        return;
+    }
+
+    object->owner = tid;
+    object->lockCount = 0;
+
+    if (sPthreadDebugLevel >= CAPTURE_CALLSTACK) {
+        // always record the call stack when acquiring a lock.
+        // it's not efficient, but is useful during diagnostics
+        object->stackDepth = get_backtrace(object->stackTrace, STACK_TRACE_DEPTH);
+    }
+
+    // no other locks held in this thread -- no deadlock possible!
+    if (mrl == NULL)
+        return;
+
+    // check if the lock we're trying to acquire is a direct descendant of
+    // the most recently locked mutex in this thread, in which case we're
+    // in a good situation -- no deadlock possible
+    if (historyListHas(&mrl->children, object) >= 0)
+        return;
+
+    pthread_mutex_lock_unchecked(&sDbgLock);
+
+    linkParentToChild(mrl, object);
+    if (!traverseTree(object, mrl)) {
+        deinit_mapinfo(sMapInfo);
+        sMapInfo = NULL;
+        LOGW("%s\n", kEndBanner);
+        unlinkParentFromChild(mrl, object);
+        // reenable pthread debugging for this thread
+        sPthreadDebugDisabledThread = -1;
+    } else {
+        // record the call stack for this link
+        // NOTE: the call stack is added at the same index
+        // as mrl in object->parents[]
+        // ie: object->parents.count == object->stacks.count, which is
+        // also the index.
+        if (sPthreadDebugLevel >= CAPTURE_CALLSTACK) {
+            callstackListAdd(&object->stacks,
+                    object->stackDepth, object->stackTrace);
+        }
+    }
+
+    pthread_mutex_unlock_unchecked(&sDbgLock);
+}
+
+static void mutex_unlock_checked(MutexInfo* object)
+{
+    pid_t tid = gettid();
+    if (object->owner == tid) {
+        if (object->lockCount == 0) {
+            object->owner = 0;
+        } else {
+            object->lockCount--;
+        }
+    }
+}
+
+
+// =============================================================================
+// Hash Table functions
+// =============================================================================
+
+/****************************************************************************/
+
+#define HASHTABLE_SIZE      256
+
+typedef struct HashEntry HashEntry;
+struct HashEntry {
+    size_t slot;
+    HashEntry* prev;
+    HashEntry* next;
+    void* data;
+};
+
+typedef struct HashTable HashTable;
+struct HashTable {
+    HashEntry* slots[HASHTABLE_SIZE];
+};
+
+static HashTable sMutexMap;
+static HashTable sThreadMap;
+
+/****************************************************************************/
+
+static uint32_t get_hashcode(void const * key, size_t keySize)
+{
+    uint32_t h = keySize;
+    char const* data = (char const*)key;
+    size_t i;
+    for (i = 0; i < keySize; i++) {
+        h = h * 31 + *data;
+        data++;
+    }
+    return (uint32_t)h;
+}
+
+static size_t get_index(uint32_t h)
+{
+    // We apply this secondary hashing discovered by Doug Lea to defend
+    // against bad hashes.
+    h += ~(h << 9);
+    h ^= (((unsigned int) h) >> 14);
+    h += (h << 4);
+    h ^= (((unsigned int) h) >> 10);
+    return (size_t)h & (HASHTABLE_SIZE - 1);
+}
+
+/****************************************************************************/
+
+static void hashmap_init(HashTable* table) {
+    memset(table, 0, sizeof(HashTable));
+}
+
+static void hashmap_removeEntry(HashTable* table, HashEntry* entry)
+{
+    HashEntry* prev = entry->prev;
+    HashEntry* next = entry->next;
+    if (prev != NULL) entry->prev->next = next;
+    if (next != NULL) entry->next->prev = prev;
+    if (prev == NULL) {
+        // we are the head of the list. set the head to be next
+        table->slots[entry->slot] = entry->next;
+    }
+}
+
+static HashEntry* hashmap_lookup(HashTable* table,
+        void const* key, size_t ksize,
+        int (*equals)(void const* data, void const* key))
+{
+    const uint32_t hash = get_hashcode(key, ksize);
+    const size_t slot = get_index(hash);
+
+    HashEntry* entry = table->slots[slot];
+    while (entry) {
+        if (equals(entry->data, key)) {
+            break;
+        }
+        entry = entry->next;
+    }
+
+    if (entry == NULL) {
+        // create a new entry
+        entry = (HashEntry*)DbgAllocLocked(sizeof(HashEntry));
+        entry->data = NULL;
+        entry->slot = slot;
+        entry->prev = NULL;
+        entry->next = table->slots[slot];
+        if (entry->next != NULL) {
+            entry->next->prev = entry;
+        }
+        table->slots[slot] = entry;
+    }
+    return entry;
+}
+
+/****************************************************************************/
+
+static int MutexInfo_equals(void const* data, void const* key) {
+    return ((MutexInfo const *)data)->mutex == *(pthread_mutex_t **)key;
+}
+
+static MutexInfo* get_mutex_info(pthread_mutex_t *mutex)
+{
+    pthread_mutex_lock_unchecked(&sDbgLock);
+
+    HashEntry* entry = hashmap_lookup(&sMutexMap,
+            &mutex, sizeof(mutex),
+            &MutexInfo_equals);
+    if (entry->data == NULL) {
+        entry->data = (MutexInfo*)DbgAllocLocked(sizeof(MutexInfo));
+        initMutexInfo(entry->data, mutex);
+    }
+
+    pthread_mutex_unlock_unchecked(&sDbgLock);
+
+    return (MutexInfo *)entry->data;
+}
+
+/****************************************************************************/
+
+static int ThreadInfo_equals(void const* data, void const* key) {
+    return ((ThreadInfo const *)data)->pid == *(pid_t *)key;
+}
+
+static ThreadInfo* get_thread_info(pid_t pid)
+{
+    pthread_mutex_lock_unchecked(&sDbgLock);
+
+    HashEntry* entry = hashmap_lookup(&sThreadMap,
+            &pid, sizeof(pid),
+            &ThreadInfo_equals);
+    if (entry->data == NULL) {
+        entry->data = (ThreadInfo*)DbgAllocLocked(sizeof(ThreadInfo));
+        initThreadInfo(entry->data, pid);
+    }
+
+    pthread_mutex_unlock_unchecked(&sDbgLock);
+
+    return (ThreadInfo *)entry->data;
+}
+
+static void push_most_recently_locked(MutexInfo* mrl) {
+    ThreadInfo* tinfo = get_thread_info(gettid());
+    mrl->next = NULL;
+    mrl->prev = tinfo->mrl;
+    tinfo->mrl = mrl;
+}
+
+static void remove_most_recently_locked(MutexInfo* mrl) {
+    ThreadInfo* tinfo = get_thread_info(gettid());
+    if (mrl->next) {
+        (mrl->next)->prev = mrl->prev;
+    }
+    if (mrl->prev) {
+        (mrl->prev)->next = mrl->next;
+    }
+    if (tinfo->mrl == mrl) {
+        tinfo->mrl = mrl->next;
+    }
+}
+
+static MutexInfo* get_most_recently_locked() {
+    ThreadInfo* tinfo = get_thread_info(gettid());
+    return tinfo->mrl;
+}
+
+/****************************************************************************/
+
+/* pthread_debug_init() is called from libc_init_dynamic() just
+ * after system properties have been initialized
+ */
+
+__LIBC_HIDDEN__
+void pthread_debug_init(void) {
+    char env[PROP_VALUE_MAX];
+    if (__system_property_get("debug.libc.pthread", env)) {
+        int level = atoi(env);
+        if (level) {
+            LOGI("pthread deadlock detection level %d enabled for pid %d (%s)",
+                    level, getpid(), __progname);
+            hashmap_init(&sMutexMap);
+            sPthreadDebugLevel = level;
+        }
+    }
+}
+
+/*
+ * See if we were allowed to grab the lock at this time.  We do it
+ * *after* acquiring the lock, rather than before, so that we can
+ * freely update the MutexInfo struct.  This seems counter-intuitive,
+ * but our goal is deadlock *prediction* not deadlock *prevention*.
+ * (If we actually deadlock, the situation is easy to diagnose from
+ * a thread dump, so there's no point making a special effort to do
+ * the checks before the lock is held.)
+ */
+
+__LIBC_HIDDEN__
+void pthread_debug_mutex_lock_check(pthread_mutex_t *mutex)
+{
+    if (sPthreadDebugLevel == 0) return;
+    // prediction disabled for this thread
+    if (sPthreadDebugDisabledThread == gettid())
+        return;
+    MutexInfo* object = get_mutex_info(mutex);
+    MutexInfo* mrl = get_most_recently_locked();
+    mutex_lock_checked(mrl, object);
+    push_most_recently_locked(object);
+}
+
+/*
+ * pthread_debug_mutex_unlock_check() must be called with the mutex
+ * still held (ie: before calling the real unlock)
+ */
+
+__LIBC_HIDDEN__
+void pthread_debug_mutex_unlock_check(pthread_mutex_t *mutex)
+{
+    if (sPthreadDebugLevel == 0) return;
+    // prediction disabled for this thread
+    if (sPthreadDebugDisabledThread == gettid())
+        return;
+    MutexInfo* object = get_mutex_info(mutex);
+    remove_most_recently_locked(object);
+    mutex_unlock_checked(object);
+}
diff --git a/libc/bionic/semaphore.c b/libc/bionic/semaphore.c
index 96819ae..9bc8412 100644
--- a/libc/bionic/semaphore.c
+++ b/libc/bionic/semaphore.c
@@ -174,7 +174,7 @@
 
         new = SEMCOUNT_DECREMENT(old);
     }
-    while (__atomic_cmpxchg((int)(old|shared),
+    while (__bionic_cmpxchg((int)(old|shared),
                             (int)(new|shared),
                             (volatile int *)pvalue) != 0);
     return ret;
@@ -198,7 +198,7 @@
 
         new = SEMCOUNT_DECREMENT(old);
     }
-    while (__atomic_cmpxchg((int)(old|shared),
+    while (__bionic_cmpxchg((int)(old|shared),
                             (int)(new|shared),
                             (volatile int *)pvalue) != 0);
 
@@ -235,7 +235,7 @@
         else
             new = SEMCOUNT_INCREMENT(old);
     }
-    while ( __atomic_cmpxchg((int)(old|shared),
+    while ( __bionic_cmpxchg((int)(old|shared),
                              (int)(new|shared),
                              (volatile int*)pvalue) != 0);
 
diff --git a/libc/bionic/stubs.c b/libc/bionic/stubs.c
index 5f63427..cc4c04e 100644
--- a/libc/bionic/stubs.c
+++ b/libc/bionic/stubs.c
@@ -250,30 +250,58 @@
 static unsigned
 app_id_from_name( const char*  name )
 {
-    unsigned long  id;
+    unsigned long  userid;
+    unsigned long  appid;
     char*          end;
 
-    if (memcmp(name, "app_", 4) != 0 || !isdigit(name[4]))
+    if (name[0] != 'u' || !isdigit(name[1]))
         goto FAIL;
 
-    id = strtoul(name+4, &end, 10);
-    if (*end != '\0')
+    userid = strtoul(name+1, &end, 10);
+    if (end[0] != '_' || end[1] == 0 || !isdigit(end[2]))
         goto FAIL;
 
-    id += AID_APP;
-
-    /* check for overflow and that the value can be
-     * stored in our 32-bit uid_t/gid_t */
-    if (id < AID_APP || (unsigned)id != id)
+    if (end[1] == 'a')
+        appid = strtoul(end+2, &end, 10) + AID_APP;
+    else if (end[1] == 'i')
+        appid = strtoul(end+2, &end, 10) + AID_ISOLATED_START;
+    else
         goto FAIL;
 
-    return (unsigned)id;
+    if (end[0] != 0)
+        goto FAIL;
+
+    /* check that user id won't overflow */
+    if (userid > 1000)
+        goto FAIL;
+
+    /* check that app id is within range */
+    if (appid < AID_APP || appid >= AID_USER)
+        goto FAIL;
+
+    return (unsigned)(appid + userid*AID_USER);
 
 FAIL:
     errno = ENOENT;
     return 0;
 }
 
+static void
+print_app_uid_name(uid_t  uid, char* buffer, int bufferlen)
+{
+    uid_t appid;
+    uid_t userid;
+
+    appid = uid % AID_USER;
+    userid = uid / AID_USER;
+
+    if (appid < AID_ISOLATED_START) {
+        snprintf(buffer, bufferlen, "u%u_a%u", userid, appid - AID_APP);
+    } else {
+        snprintf(buffer, bufferlen, "u%u_i%u", userid, appid - AID_ISOLATED_START);
+    }
+}
+
 /* translate a uid into the corresponding app_<uid>
  * passwd structure (sets errno to ENOENT on failure)
  */
@@ -287,8 +315,7 @@
         return NULL;
     }
 
-    snprintf( state->app_name_buffer, sizeof state->app_name_buffer,
-              "app_%u", uid - AID_APP );
+    print_app_uid_name(uid, state->app_name_buffer, sizeof state->app_name_buffer);
 
     pw->pw_name  = state->app_name_buffer;
     pw->pw_dir   = "/data";
@@ -306,14 +333,15 @@
 app_id_to_group(gid_t  gid, stubs_state_t*  state)
 {
     struct group*  gr = &state->group;
+    int appid;
+    int userid;
 
     if (gid < AID_APP) {
         errno = ENOENT;
         return NULL;
     }
 
-    snprintf(state->group_name_buffer, sizeof state->group_name_buffer,
-             "app_%u", gid - AID_APP);
+    print_app_uid_name(gid, state->group_name_buffer, sizeof state->group_name_buffer);
 
     gr->gr_name   = state->group_name_buffer;
     gr->gr_gid    = gid;
diff --git a/libc/include/pthread.h b/libc/include/pthread.h
index 9d05769..2015ac0 100644
--- a/libc/include/pthread.h
+++ b/libc/include/pthread.h
@@ -42,9 +42,13 @@
     int volatile value;
 } pthread_mutex_t;
 
-#define  PTHREAD_MUTEX_INITIALIZER             {0}
-#define  PTHREAD_RECURSIVE_MUTEX_INITIALIZER   {0x4000}
-#define  PTHREAD_ERRORCHECK_MUTEX_INITIALIZER  {0x8000}
+#define  __PTHREAD_MUTEX_INIT_VALUE            0
+#define  __PTHREAD_RECURSIVE_MUTEX_INIT_VALUE  0x4000
+#define  __PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE 0x8000
+
+#define  PTHREAD_MUTEX_INITIALIZER             {__PTHREAD_MUTEX_INIT_VALUE}
+#define  PTHREAD_RECURSIVE_MUTEX_INITIALIZER   {__PTHREAD_RECURSIVE_MUTEX_INIT_VALUE}
+#define  PTHREAD_ERRORCHECK_MUTEX_INITIALIZER  {__PTHREAD_ERRORCHECK_MUTEX_INIT_VALUE}
 
 enum {
     PTHREAD_MUTEX_NORMAL = 0,
diff --git a/libc/include/resolv.h b/libc/include/resolv.h
index bb21c23..7c34012 100644
--- a/libc/include/resolv.h
+++ b/libc/include/resolv.h
@@ -50,21 +50,6 @@
 extern int   b64_ntop(u_char const *, size_t, char *, size_t);
 extern int   b64_pton(char const *, u_char *, size_t);
 
-/* Set name of default interface */
-extern void _resolv_set_default_iface(const char* ifname);
-
-/* set name servers for an interface */
-extern void _resolv_set_nameservers_for_iface(const char* ifname, char** servers, int numservers);
-
-/* tell resolver of the address of an interface */
-extern void _resolv_set_addr_of_iface(const char* ifname, struct in_addr* addr);
-
-/* flush the cache associated with the default interface */
-extern void _resolv_flush_cache_for_default_iface();
-
-/* flush the cache associated with a certain interface */
-extern void _resolv_flush_cache_for_iface(const char* ifname);
-
 __END_DECLS
 
 #endif /* _RESOLV_H_ */
diff --git a/libc/include/stdio.h b/libc/include/stdio.h
index 4006882..8d3d5d7 100644
--- a/libc/include/stdio.h
+++ b/libc/include/stdio.h
@@ -452,8 +452,10 @@
  * fdprintf is a better name, and some programs that use fdprintf use a
  * #define fdprintf dprintf for compatibility
  */
+__BEGIN_DECLS
 int fdprintf(int, const char*, ...);
 int vfdprintf(int, const char*, __va_list);
+__END_DECLS
 #endif /* _GNU_SOURCE */
 
 #endif /* _STDIO_H_ */
diff --git a/libc/include/stdlib.h b/libc/include/stdlib.h
index 5dc8a87..e5caadd 100644
--- a/libc/include/stdlib.h
+++ b/libc/include/stdlib.h
@@ -67,6 +67,8 @@
 extern unsigned long long strtoull(const char *, char **, int);
 extern double strtod(const char *nptr, char **endptr);
 
+extern int posix_memalign(void **memptr, size_t alignment, size_t size);
+
 static __inline__ float strtof(const char *nptr, char **endptr)
 {
     return (float)strtod(nptr, endptr);
diff --git a/libc/include/sys/atomics.h b/libc/include/sys/atomics.h
index d3fa145..3ada8de 100644
--- a/libc/include/sys/atomics.h
+++ b/libc/include/sys/atomics.h
@@ -33,10 +33,48 @@
 
 __BEGIN_DECLS
 
-extern int __atomic_cmpxchg(int old, int _new, volatile int *ptr);
-extern int __atomic_swap(int _new, volatile int *ptr);
-extern int __atomic_dec(volatile int *ptr);
-extern int __atomic_inc(volatile int *ptr);
+/* Note: atomic operations that were exported by the C library didn't
+ *       provide any memory barriers, which created potential issues on
+ *       multi-core devices. We now define them as inlined calls to
+ *       GCC sync builtins, which always provide a full barrier.
+ *
+ *       NOTE: The C library still exports atomic functions by the same
+ *              name to ensure ABI stability for existing NDK machine code.
+ *
+ *       If you are an NDK developer, we encourage you to rebuild your
+ *       unmodified sources against this header as soon as possible.
+ */
+#define __ATOMIC_INLINE__ static __inline__ __attribute__((always_inline))
+
+__ATOMIC_INLINE__ int
+__atomic_cmpxchg(int old, int _new, volatile int *ptr)
+{
+    /* We must return 0 on success */
+    return __sync_val_compare_and_swap(ptr, old, _new) != old;
+}
+
+__ATOMIC_INLINE__ int
+__atomic_swap(int _new, volatile int *ptr)
+{
+    int prev;
+    do {
+        prev = *ptr;
+    } while (__sync_val_compare_and_swap(ptr, prev, _new) != prev);
+    return prev;
+}
+
+__ATOMIC_INLINE__ int
+__atomic_dec(volatile int *ptr)
+{
+  return __sync_fetch_and_sub (ptr, 1);
+}
+
+__ATOMIC_INLINE__ int
+__atomic_inc(volatile int *ptr)
+{
+  return __sync_fetch_and_add (ptr, 1);
+}
+
 
 int __futex_wait(volatile void *ftx, int val, const struct timespec *timeout);
 int __futex_wake(volatile void *ftx, int count);
diff --git a/libc/include/sys/linux-syscalls.h b/libc/include/sys/linux-syscalls.h
index 0cb3100..9f23054 100644
--- a/libc/include/sys/linux-syscalls.h
+++ b/libc/include/sys/linux-syscalls.h
@@ -212,6 +212,7 @@
 #define __NR_waitid                       (__NR_SYSCALL_BASE + 284)
 #define __NR_kill                         (__NR_SYSCALL_BASE + 37)
 #define __NR_tkill                        (__NR_SYSCALL_BASE + 238)
+#define __NR_tgkill                       (__NR_SYSCALL_BASE + 270)
 #define __NR_set_thread_area              (__NR_SYSCALL_BASE + 243)
 #define __NR_openat                       (__NR_SYSCALL_BASE + 295)
 #define __NR_madvise                      (__NR_SYSCALL_BASE + 219)
diff --git a/libc/include/sys/linux-unistd.h b/libc/include/sys/linux-unistd.h
deleted file mode 100644
index 246d7fe..0000000
--- a/libc/include/sys/linux-unistd.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* auto-generated by gensyscalls.py, do not touch */
-#ifndef _BIONIC_LINUX_UNISTD_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void             _exit (int);
-void             _exit_thread (int);
-pid_t            __fork (void);
-pid_t            _waitpid (pid_t, int*, int, struct rusage*);
-int              __waitid (int, pid_t, struct siginfo_t*, int,void*);
-pid_t            __sys_clone (int, void*, int*, void*, int*);
-int              execve (const char*, char* const*, char* const*);
-int              __setuid (uid_t);
-uid_t            getuid (void);
-gid_t            getgid (void);
-uid_t            geteuid (void);
-gid_t            getegid (void);
-uid_t            getresuid (void);
-gid_t            getresgid (void);
-pid_t            gettid (void);
-ssize_t          readahead (int, off64_t, size_t);
-int              getgroups (int, gid_t *);
-pid_t            getpgid (pid_t);
-pid_t            getppid (void);
-pid_t            setsid (void);
-int              setgid (gid_t);
-int              seteuid (uid_t);
-int              __setreuid (uid_t, uid_t);
-int              __setresuid (uid_t, uid_t, uid_t);
-int              setresgid (gid_t, gid_t, gid_t);
-void*            __brk (void*);
-int              kill (pid_t, int);
-int              tkill (pid_t tid, int sig);
-int              __ptrace (int request, int pid, void* addr, void* data);
-int              __set_thread_area (void*  user_desc);
-int              __getpriority (int, int);
-int              setpriority (int, int, int);
-int              setrlimit (int resource, const struct rlimit *rlp);
-int              getrlimit (int resource, struct rlimit *rlp);
-int              getrusage (int who, struct rusage*  r_usage);
-int              setgroups (int, const gid_t *);
-pid_t            getpgrp (void);
-int              setpgid (pid_t, pid_t);
-pid_t            vfork (void);
-int              setregid (gid_t, gid_t);
-int              chroot (const char *);
-int              prctl (int option, unsigned int arg2, unsigned int arg3, unsigned int arg4, unsigned int arg5);
-int              capget (cap_user_header_t header, cap_user_data_t data);
-int              capset (cap_user_header_t header, const cap_user_data_t data);
-int              sigaltstack (const stack_t*, stack_t*);
-int              acct (const char*  filepath);
-ssize_t          read (int, void*, size_t);
-ssize_t          write (int, const void*, size_t);
-ssize_t          pread64 (int, void *, size_t, off64_t);
-ssize_t          pwrite64 (int, void *, size_t, off64_t);
-int              __open (const char*, int, mode_t);
-int              __openat (int, const char*, int, mode_t);
-int              close (int);
-int              creat (const char*, mode_t);
-off_t            lseek (int, off_t, int);
-int              __llseek (int, unsigned long, unsigned long, loff_t*, int);
-pid_t            getpid (void);
-void *           mmap (void *, size_t, int, int, int, long);
-void *           __mmap2 (void*, size_t, int, int, int, long);
-int              munmap (void *, size_t);
-void *           mremap (void *, size_t, size_t, unsigned long);
-int              msync (const void *, size_t, int);
-int              mprotect (const void *, size_t, int);
-int              madvise (const void *, size_t, int);
-int              mlock (const void *addr, size_t len);
-int              munlock (const void *addr, size_t len);
-int              mincore (void*  start, size_t  length, unsigned char*  vec);
-int              __ioctl (int, int, void *);
-int              readv (int, const struct iovec *, int);
-int              writev (int, const struct iovec *, int);
-int              __fcntl (int, int, void*);
-int              flock (int, int);
-int              fchmod (int, mode_t);
-int              dup (int);
-int              pipe (int *);
-int              pipe2 (int *, int);
-int              dup2 (int, int);
-int              select (int, struct fd_set *, struct fd_set *, struct fd_set *, struct timeval *);
-int              ftruncate (int, off_t);
-int              ftruncate64 (int, off64_t);
-int              getdents (unsigned int, struct dirent *, unsigned int);
-int              fsync (int);
-int              fdatasync (int);
-int              fchown (int, uid_t, gid_t);
-void             sync (void);
-int              __fcntl64 (int, int, void *);
-int              __fstatfs64 (int, size_t, struct statfs *);
-ssize_t          sendfile (int out_fd, int in_fd, off_t *offset, size_t count);
-int              fstatat (int dirfd, const char *path, struct stat *buf, int flags);
-int              mkdirat (int dirfd, const char *pathname, mode_t mode);
-int              fchownat (int dirfd, const char *path, uid_t owner, gid_t group, int flags);
-int              fchmodat (int dirfd, const char *path, mode_t mode, int flags);
-int              renameat (int olddirfd, const char *oldpath, int newdirfd, const char *newpath);
-int              fsetxattr (int, const char *, const void *, size_t, int);
-ssize_t          fgetxattr (int, const char *, void *, size_t);
-ssize_t          flistxattr (int, char *, size_t);
-int              fremovexattr (int, const char *);
-int              link (const char*, const char*);
-int              unlink (const char*);
-int              unlinkat (int, const char *, int);
-int              chdir (const char*);
-int              mknod (const char*, mode_t, dev_t);
-int              chmod (const char*,mode_t);
-int              chown (const char *, uid_t, gid_t);
-int              lchown (const char*, uid_t, gid_t);
-int              mount (const char*, const char*, const char*, unsigned long, const void*);
-int              umount (const char*);
-int              umount2 (const char*, int);
-int              fstat (int, struct stat*);
-int              stat (const char *, struct stat *);
-int              lstat (const char *, struct stat *);
-int              mkdir (const char *, mode_t);
-int              readlink (const char *, char *, size_t);
-int              rmdir (const char *);
-int              rename (const char *, const char *);
-int              __getcwd (char * buf, size_t size);
-int              access (const char *, int);
-int              symlink (const char *, const char *);
-int              fchdir (int);
-int              truncate (const char*, off_t);
-int              setxattr (const char *, const char *, const void *, size_t, int);
-int              lsetxattr (const char *, const char *, const void *, size_t, int);
-ssize_t          getxattr (const char *, const char *, void *, size_t);
-ssize_t          lgetxattr (const char *, const char *, void *, size_t);
-ssize_t          listxattr (const char *, char *, size_t);
-ssize_t          llistxattr (const char *, char *, size_t);
-int              removexattr (const char *, const char *);
-int              lremovexattr (const char *, const char *);
-int              __statfs64 (const char *, size_t, struct statfs *);
-int              pause (void);
-int              gettimeofday (struct timeval*, struct timezone*);
-int              settimeofday (const struct timeval*, const struct timezone*);
-clock_t          times (struct tms *);
-int              nanosleep (const struct timespec *, struct timespec *);
-int              clock_gettime (clockid_t clk_id, struct timespec *tp);
-int              clock_settime (clockid_t clk_id, const struct timespec *tp);
-int              clock_getres (clockid_t clk_id, struct timespec *res);
-int              clock_nanosleep (const struct timespec *req, struct timespec *rem);
-int              getitimer (int, const struct itimerval *);
-int              setitimer (int, const struct itimerval *, struct itimerval *);
-int              __timer_create (clockid_t clockid, struct sigevent *evp, timer_t *timerid);
-int              __timer_settime (timer_t, int, const struct itimerspec*, struct itimerspec*);
-int              __timer_gettime (timer_t, struct itimerspec*);
-int              __timer_getoverrun (timer_t);
-int              __timer_delete (timer_t);
-int              utimes (const char*, const struct timeval tvp[2]);
-int              utimensat (int, const char *, const struct timespec times[2], int);
-int              sigaction (int, const struct sigaction *, struct sigaction *);
-int              sigprocmask (int, const sigset_t *, sigset_t *);
-int              __sigsuspend (int unused1, int unused2, unsigned mask);
-int              __rt_sigaction (int sig, const struct sigaction *act, struct sigaction *oact, size_t sigsetsize);
-int              __rt_sigprocmask (int  how, const sigset_t *set, sigset_t *oset, size_t sigsetsize);
-int              __rt_sigtimedwait (const sigset_t *set, struct siginfo_t  *info, struct timespec_t  *timeout, size_t  sigset_size);
-int              sigpending (sigset_t *);
-int              socket (int, int, int);
-int              socketpair (int, int, int, int*);
-int              bind (int, struct sockaddr *, int);
-int              connect (int, struct sockaddr *, socklen_t);
-int              listen (int, int);
-int              accept (int, struct sockaddr *, socklen_t *);
-int              getsockname (int, struct sockaddr *, socklen_t *);
-int              getpeername (int, struct sockaddr *, socklen_t *);
-int              sendto (int, const void *, size_t, int, const struct sockaddr *, socklen_t);
-int              recvfrom (int, void *, size_t, unsigned int, struct sockaddr *, socklen_t *);
-int              shutdown (int, int);
-int              setsockopt (int, int, int, const void *, socklen_t);
-int              getsockopt (int, int, int, void *, socklen_t *);
-int              sendmsg (int, const struct msghdr *, unsigned int);
-int              recvmsg (int, struct msghdr *, unsigned int);
-int              socket (int, int, int);
-int              bind (int, struct sockaddr *, int);
-int              connect (int, struct sockaddr *, socklen_t);
-int              listen (int, int);
-int              accept (int, struct sockaddr *, socklen_t *);
-int              getsockname (int, struct sockaddr *, socklen_t *);
-int              getpeername (int, struct sockaddr *, socklen_t *);
-int              socketpair (int, int, int, int*);
-int              sendto (int, const void *, size_t, int, const struct sockaddr *, socklen_t);
-int              recvfrom (int, void *, size_t, unsigned int, struct sockaddr *, socklen_t *);
-int              shutdown (int, int);
-int              setsockopt (int, int, int, const void *, socklen_t);
-int              getsockopt (int, int, int, void *, socklen_t *);
-int              sendmsg (int, const struct msghdr *, unsigned int);
-int              recvmsg (int, struct msghdr *, unsigned int);
-int              __socketcall (int, unsigned long*);
-int              sched_setscheduler (pid_t pid, int policy, const struct sched_param *param);
-int              sched_getscheduler (pid_t pid);
-int              sched_yield (void);
-int              sched_setparam (pid_t pid, const struct sched_param *param);
-int              sched_getparam (pid_t pid, struct sched_param *param);
-int              sched_get_priority_max (int policy);
-int              sched_get_priority_min (int policy);
-int              sched_rr_get_interval (pid_t pid, struct timespec *interval);
-int              sched_setaffinity (pid_t pid, size_t setsize, const cpu_set_t* set);
-int              __sched_getaffinity (pid_t pid, size_t setsize, cpu_set_t* set);
-int              __getcpu (unsigned *cpu, unsigned *node, void *unused);
-int              ioprio_set (int which, int who, int ioprio);
-int              ioprio_get (int which, int who);
-int              uname (struct utsname *);
-pid_t            __wait4 (pid_t pid, int *status, int options, struct rusage *rusage);
-mode_t           umask (mode_t);
-int              __reboot (int, int, int, void *);
-int              __syslog (int, char *, int);
-int              init_module (void *, unsigned long, const char *);
-int              delete_module (const char*, unsigned int);
-int              klogctl (int, char *, int);
-int              sysinfo (struct sysinfo *);
-int              personality (unsigned long);
-int              futex (void *, int, int, void *, void *, int);
-int              epoll_create (int size);
-int              epoll_ctl (int epfd, int op, int fd, struct epoll_event *event);
-int              epoll_wait (int epfd, struct epoll_event *events, int max, int timeout);
-int              inotify_init (void);
-int              inotify_add_watch (int, const char *, unsigned int);
-int              inotify_rm_watch (int, unsigned int);
-int              poll (struct pollfd *, unsigned int, long);
-int              eventfd (unsigned int, int);
-int              __set_tls (void*);
-int              cacheflush (long start, long end, long flags);
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _BIONIC_LINUX_UNISTD_H_ */
diff --git a/libc/include/sys/personality.h b/libc/include/sys/personality.h
index 2199fa7..7772550 100644
--- a/libc/include/sys/personality.h
+++ b/libc/include/sys/personality.h
@@ -29,56 +29,10 @@
 #ifndef _SYS_PERSONALITY_H_
 #define _SYS_PERSONALITY_H_
 
+#include <linux/personality.h>
+
 __BEGIN_DECLS
 
-/* constants taken from linux-3.0.4/include/linux/personality.h */
-
-enum {
-        UNAME26 =               0x0020000,
-        ADDR_NO_RANDOMIZE =     0x0040000,
-        FDPIC_FUNCPTRS =        0x0080000,
-        MMAP_PAGE_ZERO =        0x0100000,
-        ADDR_COMPAT_LAYOUT =    0x0200000,
-        READ_IMPLIES_EXEC =     0x0400000,
-        ADDR_LIMIT_32BIT =      0x0800000,
-        SHORT_INODE =           0x1000000,
-        WHOLE_SECONDS =         0x2000000,
-        STICKY_TIMEOUTS =       0x4000000,
-        ADDR_LIMIT_3GB =        0x8000000,
-};
-
-#define PER_CLEAR_ON_SETID (READ_IMPLIES_EXEC  | \
-                            ADDR_NO_RANDOMIZE  | \
-                            ADDR_COMPAT_LAYOUT | \
-                            MMAP_PAGE_ZERO)
-
-enum {
-        PER_LINUX =             0x0000,
-        PER_LINUX_32BIT =       0x0000 | ADDR_LIMIT_32BIT,
-        PER_LINUX_FDPIC =       0x0000 | FDPIC_FUNCPTRS,
-        PER_SVR4 =              0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-        PER_SVR3 =              0x0002 | STICKY_TIMEOUTS | SHORT_INODE,
-        PER_SCOSVR3 =           0x0003 | STICKY_TIMEOUTS |
-                                         WHOLE_SECONDS | SHORT_INODE,
-        PER_OSR5 =              0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS,
-        PER_WYSEV386 =          0x0004 | STICKY_TIMEOUTS | SHORT_INODE,
-        PER_ISCR4 =             0x0005 | STICKY_TIMEOUTS,
-        PER_BSD =               0x0006,
-        PER_SUNOS =             0x0006 | STICKY_TIMEOUTS,
-        PER_XENIX =             0x0007 | STICKY_TIMEOUTS | SHORT_INODE,
-        PER_LINUX32 =           0x0008,
-        PER_LINUX32_3GB =       0x0008 | ADDR_LIMIT_3GB,
-        PER_IRIX32 =            0x0009 | STICKY_TIMEOUTS,
-        PER_IRIXN32 =           0x000a | STICKY_TIMEOUTS,
-        PER_IRIX64 =            0x000b | STICKY_TIMEOUTS,
-        PER_RISCOS =            0x000c,
-        PER_SOLARIS =           0x000d | STICKY_TIMEOUTS,
-        PER_UW7 =               0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-        PER_OSF4 =              0x000f,
-        PER_HPUX =              0x0010,
-        PER_MASK =              0x00ff,
-};
-
 extern int personality (unsigned long persona);
 
 __END_DECLS
diff --git a/libc/include/time.h b/libc/include/time.h
index 4ad4f7b..8867b32 100644
--- a/libc/include/time.h
+++ b/libc/include/time.h
@@ -79,24 +79,6 @@
 extern char*       strptime(const char *buf, const char *fmt, struct tm *tm);
 extern size_t      strftime(char *s, size_t max, const char *format, const struct tm *tm);
 
-/* ANDROID-BEGIN */
-struct strftime_locale {
-    const char *  mon[12];
-    const char *  month[12];
-    const char *  standalone_month[12];
-    const char *  wday[7];
-    const char *  weekday[7];
-    const char *  X_fmt;
-    const char *  x_fmt;
-    const char *  c_fmt;
-    const char *  am;
-    const char *  pm;
-    const char *  date_fmt;
-};
-
-extern size_t      strftime_tz(char *s, size_t max, const char *format, const struct tm *tm, const struct strftime_locale*  lc);
-/* ANDROID-END */
-
 extern char *ctime(const time_t *timep);
 extern char *ctime_r(const time_t *timep, char *buf);
 
diff --git a/libc/kernel/README.TXT b/libc/kernel/README.TXT
index 76dfa4d..9ff97d6 100644
--- a/libc/kernel/README.TXT
+++ b/libc/kernel/README.TXT
@@ -56,7 +56,7 @@
 
   * tools/update_all.py
     automatically update all clean headers from the content of 
-    'bionic/kernel/original'. this is the script you're likely going to 
+    'external/kernel-headers/original'. this is the script you're likely going to
     run whenever you update the original headers.
 
 NOTE:
diff --git a/libc/kernel/common/linux/capability.h b/libc/kernel/common/linux/capability.h
index 605bc27..7350e7c 100644
--- a/libc/kernel/common/linux/capability.h
+++ b/libc/kernel/common/linux/capability.h
@@ -7,89 +7,114 @@
  ***   structures, and macros generated from the original header, and thus,
  ***   contains no copyrightable information.
  ***
+ ***   To edit the content of this header, modify the corresponding
+ ***   source file (e.g. under external/kernel-headers/original/) then
+ ***   run bionic/libc/kernel/tools/update_all.py
+ ***
+ ***   Any manual change here will be lost the next time this script will
+ ***   be run. You've been warned!
+ ***
  ****************************************************************************
  ****************************************************************************/
 #ifndef _LINUX_CAPABILITY_H
 #define _LINUX_CAPABILITY_H
-
 #include <linux/types.h>
-#include <linux/compiler.h>
-
-#define _LINUX_CAPABILITY_VERSION 0x19980330
-
+struct task_struct;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define _LINUX_CAPABILITY_VERSION_1 0x19980330
+#define _LINUX_CAPABILITY_U32S_1 1
+#define _LINUX_CAPABILITY_VERSION_2 0x20071026  
+#define _LINUX_CAPABILITY_U32S_2 2
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define _LINUX_CAPABILITY_VERSION_3 0x20080522
+#define _LINUX_CAPABILITY_U32S_3 2
 typedef struct __user_cap_header_struct {
  __u32 version;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  int pid;
 } __user *cap_user_header_t;
-
 typedef struct __user_cap_data_struct {
  __u32 effective;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  __u32 permitted;
  __u32 inheritable;
 } __user *cap_user_data_t;
-
+#define VFS_CAP_REVISION_MASK 0xFF000000
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define VFS_CAP_REVISION_SHIFT 24
+#define VFS_CAP_FLAGS_MASK ~VFS_CAP_REVISION_MASK
+#define VFS_CAP_FLAGS_EFFECTIVE 0x000001
+#define VFS_CAP_REVISION_1 0x01000000
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define VFS_CAP_U32_1 1
+#define XATTR_CAPS_SZ_1 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_1))
+#define VFS_CAP_REVISION_2 0x02000000
+#define VFS_CAP_U32_2 2
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define XATTR_CAPS_SZ_2 (sizeof(__le32)*(1 + 2*VFS_CAP_U32_2))
+#define XATTR_CAPS_SZ XATTR_CAPS_SZ_2
+#define VFS_CAP_U32 VFS_CAP_U32_2
+#define VFS_CAP_REVISION VFS_CAP_REVISION_2
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+struct vfs_cap_data {
+ __le32 magic_etc;
+ struct {
+ __le32 permitted;
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+ __le32 inheritable;
+ } data[VFS_CAP_U32];
+};
+#define _LINUX_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_1
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define _LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_1
 #define CAP_CHOWN 0
-
 #define CAP_DAC_OVERRIDE 1
-
 #define CAP_DAC_READ_SEARCH 2
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_FOWNER 3
-
 #define CAP_FSETID 4
-
-#define CAP_FS_MASK 0x1f
-
 #define CAP_KILL 5
-
 #define CAP_SETGID 6
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_SETUID 7
-
 #define CAP_SETPCAP 8
-
 #define CAP_LINUX_IMMUTABLE 9
-
 #define CAP_NET_BIND_SERVICE 10
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_NET_BROADCAST 11
-
 #define CAP_NET_ADMIN 12
-
 #define CAP_NET_RAW 13
-
 #define CAP_IPC_LOCK 14
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_IPC_OWNER 15
-
 #define CAP_SYS_MODULE 16
-
 #define CAP_SYS_RAWIO 17
-
 #define CAP_SYS_CHROOT 18
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_SYS_PTRACE 19
-
 #define CAP_SYS_PACCT 20
-
 #define CAP_SYS_ADMIN 21
-
 #define CAP_SYS_BOOT 22
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_SYS_NICE 23
-
 #define CAP_SYS_RESOURCE 24
-
 #define CAP_SYS_TIME 25
-
 #define CAP_SYS_TTY_CONFIG 26
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 #define CAP_MKNOD 27
-
 #define CAP_LEASE 28
-
 #define CAP_AUDIT_WRITE 29
-
 #define CAP_AUDIT_CONTROL 30
-
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define CAP_SETFCAP 31
+#define CAP_MAC_OVERRIDE 32
+#define CAP_MAC_ADMIN 33
+#define CAP_SYSLOG 34
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define CAP_WAKE_ALARM 35
+#define CAP_LAST_CAP CAP_WAKE_ALARM
+#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
+#define CAP_TO_INDEX(x) ((x) >> 5)  
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
+#define CAP_TO_MASK(x) (1 << ((x) & 31))  
 #endif
diff --git a/libc/kernel/common/linux/input.h b/libc/kernel/common/linux/input.h
index 4045eb0..3fc89f4 100644
--- a/libc/kernel/common/linux/input.h
+++ b/libc/kernel/common/linux/input.h
@@ -74,6 +74,8 @@
 #define EVIOCRMFF _IOW('E', 0x81, int)
 #define EVIOCGEFFECTS _IOR('E', 0x84, int)
 #define EVIOCGRAB _IOW('E', 0x90, int)
+#define EVIOCGSUSPENDBLOCK _IOR('E', 0x91, int)
+#define EVIOCSSUSPENDBLOCK _IOW('E', 0x91, int)
 #define INPUT_PROP_POINTER 0x00
 #define INPUT_PROP_DIRECT 0x01
 #define INPUT_PROP_BUTTONPAD 0x02
diff --git a/libc/kernel/common/linux/personality.h b/libc/kernel/common/linux/personality.h
index 1f84b4e..bb55b75 100644
--- a/libc/kernel/common/linux/personality.h
+++ b/libc/kernel/common/linux/personality.h
@@ -7,77 +7,65 @@
  ***   structures, and macros generated from the original header, and thus,
  ***   contains no copyrightable information.
  ***
+ ***   To edit the content of this header, modify the corresponding
+ ***   source file (e.g. under external/kernel-headers/original/) then
+ ***   run bionic/libc/kernel/tools/update_all.py
+ ***
+ ***   Any manual change here will be lost the next time this script will
+ ***   be run. You've been warned!
+ ***
  ****************************************************************************
  ****************************************************************************/
 #ifndef _LINUX_PERSONALITY_H
 #define _LINUX_PERSONALITY_H
-
-struct exec_domain;
-struct pt_regs;
-
 enum {
+ UNAME26 = 0x0020000,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  ADDR_NO_RANDOMIZE = 0x0040000,
  FDPIC_FUNCPTRS = 0x0080000,
  MMAP_PAGE_ZERO = 0x0100000,
  ADDR_COMPAT_LAYOUT = 0x0200000,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  READ_IMPLIES_EXEC = 0x0400000,
  ADDR_LIMIT_32BIT = 0x0800000,
  SHORT_INODE = 0x1000000,
  WHOLE_SECONDS = 0x2000000,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  STICKY_TIMEOUTS = 0x4000000,
  ADDR_LIMIT_3GB = 0x8000000,
 };
-
-#define PER_CLEAR_ON_SETID (READ_IMPLIES_EXEC|ADDR_NO_RANDOMIZE)
-
+#define PER_CLEAR_ON_SETID (READ_IMPLIES_EXEC |   ADDR_NO_RANDOMIZE |   ADDR_COMPAT_LAYOUT |   MMAP_PAGE_ZERO)
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
 enum {
  PER_LINUX = 0x0000,
  PER_LINUX_32BIT = 0x0000 | ADDR_LIMIT_32BIT,
  PER_LINUX_FDPIC = 0x0000 | FDPIC_FUNCPTRS,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  PER_SVR4 = 0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
  PER_SVR3 = 0x0002 | STICKY_TIMEOUTS | SHORT_INODE,
  PER_SCOSVR3 = 0x0003 | STICKY_TIMEOUTS |
  WHOLE_SECONDS | SHORT_INODE,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  PER_OSR5 = 0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS,
  PER_WYSEV386 = 0x0004 | STICKY_TIMEOUTS | SHORT_INODE,
  PER_ISCR4 = 0x0005 | STICKY_TIMEOUTS,
  PER_BSD = 0x0006,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  PER_SUNOS = 0x0006 | STICKY_TIMEOUTS,
  PER_XENIX = 0x0007 | STICKY_TIMEOUTS | SHORT_INODE,
  PER_LINUX32 = 0x0008,
  PER_LINUX32_3GB = 0x0008 | ADDR_LIMIT_3GB,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  PER_IRIX32 = 0x0009 | STICKY_TIMEOUTS,
  PER_IRIXN32 = 0x000a | STICKY_TIMEOUTS,
  PER_IRIX64 = 0x000b | STICKY_TIMEOUTS,
  PER_RISCOS = 0x000c,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  PER_SOLARIS = 0x000d | STICKY_TIMEOUTS,
  PER_UW7 = 0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
  PER_OSF4 = 0x000f,
  PER_HPUX = 0x0010,
+/* WARNING: DO NOT EDIT, AUTO-GENERATED CODE - SEE TOP FOR INSTRUCTIONS */
  PER_MASK = 0x00ff,
 };
-
-typedef void (*handler_t)(int, struct pt_regs *);
-
-struct exec_domain {
- const char *name;
- handler_t handler;
- unsigned char pers_low;
- unsigned char pers_high;
- unsigned long *signal_map;
- unsigned long *signal_invmap;
- struct map_segment *err_map;
- struct map_segment *socktype_map;
- struct map_segment *sockopt_map;
- struct map_segment *af_map;
- struct module *module;
- struct exec_domain *next;
-};
-
-#define personality(pers) (pers & PER_MASK)
-
-#define get_personality (current->personality)
-
-#define set_personality(pers)   ((current->personality == pers) ? 0 : __set_personality(pers))
-
 #endif
diff --git a/libc/kernel/tools/clean_header.py b/libc/kernel/tools/clean_header.py
index 0549fc2..22e62aa 100755
--- a/libc/kernel/tools/clean_header.py
+++ b/libc/kernel/tools/clean_header.py
@@ -37,7 +37,7 @@
         src_path = src_path[1:]
 
     if len(src_path) == 0:
-        panic( "oops, internal error, can't extract correct relative path" )
+        panic( "oops, internal error, can't extract correct relative path\n" )
 
     # convert into destination path, extracting architecture if needed
     # and the corresponding list of known static functions
diff --git a/libc/kernel/tools/update_all.py b/libc/kernel/tools/update_all.py
index 6a730a5..badef92 100755
--- a/libc/kernel/tools/update_all.py
+++ b/libc/kernel/tools/update_all.py
@@ -37,13 +37,13 @@
 progdir = find_program_dir()
 
 if len(args) == 1:
-    original_dir = arg[0]
+    original_dir = args[0]
     if not os.path.isdir(original_dir):
-        panic( "Not a directory: %s" % original_dir )
+        panic( "Not a directory: %s\n" % original_dir )
 else:
     original_dir = kernel_original_path
     if not os.path.isdir(original_dir):
-        panic( "Missing directory, please specify one through command-line: %s" % original_dir )
+        panic( "Missing directory, please specify one through command-line: %s\n" % original_dir )
 
 # find all source files in 'original'
 #
diff --git a/libc/netbsd/net/getaddrinfo.c b/libc/netbsd/net/getaddrinfo.c
index ace8c1a..6aee697 100644
--- a/libc/netbsd/net/getaddrinfo.c
+++ b/libc/netbsd/net/getaddrinfo.c
@@ -461,7 +461,7 @@
 
 	// Send the request.
 	proxy = fdopen(sock, "r+");
-	if (fprintf(proxy, "getaddrinfo %s %s %d %d %d %d",
+	if (fprintf(proxy, "0 getaddrinfo %s %s %d %d %d %d",
 		    hostname == NULL ? "^" : hostname,
 		    servname == NULL ? "^" : servname,
 		    hints == NULL ? -1 : hints->ai_flags,
@@ -1867,6 +1867,19 @@
 	free(elems);
 }
 
+static int _using_alt_dns()
+{
+	char propname[PROP_NAME_MAX];
+	char propvalue[PROP_VALUE_MAX];
+
+	propvalue[0] = 0;
+	snprintf(propname, sizeof(propname), "net.dns1.%d", getpid());
+	if (__system_property_get(propname, propvalue) > 0 ) {
+		return 1;
+	}
+	return 0;
+}
+
 /*ARGSUSED*/
 static int
 _dns_getaddrinfo(void *rv, void	*cb_data, va_list ap)
@@ -1909,14 +1922,12 @@
 		q.anslen = sizeof(buf->buf);
 		int query_ipv6 = 1, query_ipv4 = 1;
 		if (pai->ai_flags & AI_ADDRCONFIG) {
-			query_ipv6 = _have_ipv6();
-			query_ipv4 = _have_ipv4();
-			if (query_ipv6 == 0 && query_ipv4 == 0) {
-				// Both our IPv4 and IPv6 connectivity probes failed, which indicates
-				// that we have neither an IPv4 or an IPv6 default route (and thus no
-				// global IPv4 or IPv6 connectivity). We might be in a walled garden.
-				// Throw up our arms and ask for both A and AAAA.
-				query_ipv6 = query_ipv4 = 1;
+			// Only implement AI_ADDRCONFIG if the application is not
+			// using its own DNS servers, since our implementation
+			// only works on the default connection.
+			if (!_using_alt_dns()) {
+				query_ipv6 = _have_ipv6();
+				query_ipv4 = _have_ipv4();
 			}
 		}
 		if (query_ipv6) {
diff --git a/libc/netbsd/net/getnameinfo.c b/libc/netbsd/net/getnameinfo.c
index d3d0011..313b2bf 100644
--- a/libc/netbsd/net/getnameinfo.c
+++ b/libc/netbsd/net/getnameinfo.c
@@ -187,7 +187,7 @@
 	if (addrStr == NULL) {
 		goto exit;
 	}
-	if (fprintf(proxy, "gethostbyaddr %s %d %d", addrStr, addrLen, addrFamily) < 0) {
+	if (fprintf(proxy, "0 gethostbyaddr %s %d %d", addrStr, addrLen, addrFamily) < 0) {
 		goto exit;
 	}
 
diff --git a/libc/netbsd/resolv/res_cache.c b/libc/netbsd/resolv/res_cache.c
index e88c221..9ae627c 100644
--- a/libc/netbsd/resolv/res_cache.c
+++ b/libc/netbsd/resolv/res_cache.c
@@ -42,6 +42,7 @@
 
 #include <arpa/inet.h>
 #include "resolv_private.h"
+#include "resolv_iface.h"
 
 /* This code implements a small and *simple* DNS resolver cache.
  *
@@ -137,9 +138,19 @@
  *
  * The system property ro.net.dns_cache_size can be used to override the default
  * value with a custom value
+ *
+ *
+ * ******************************************
+ * * NOTE - this has changed.
+ * * 1) we've added IPv6 support so each dns query results in 2 responses
+ * * 2) we've made this a system-wide cache, so the cost is less (it's not
+ * *    duplicated in each process) and the need is greater (more processes
+ * *    making different requests).
+ * * Upping by 2x for IPv6
+ * * Upping by another 5x for the centralized nature
+ * *****************************************
  */
-#define  CONFIG_MAX_ENTRIES    64
-
+#define  CONFIG_MAX_ENTRIES    64 * 2 * 5
 /* name of the system property that can be used to set the cache size */
 #define  DNS_CACHE_SIZE_PROP_NAME   "ro.net.dns_cache_size"
 
@@ -1217,6 +1228,16 @@
     int result = -1;
     char cache_size[PROP_VALUE_MAX];
 
+    const char* cache_mode = getenv("ANDROID_DNS_MODE");
+
+    if (cache_mode == NULL || strcmp(cache_mode, "local") != 0) {
+        // Don't use the cache in local mode.  This is used by the
+        // proxy itself.
+        // TODO - change this to 0 when all dns stuff uses proxy (5918973)
+        XLOG("setup cache for non-cache process. size=1");
+        return 1;
+    }
+
     if (__system_property_get(DNS_CACHE_SIZE_PROP_NAME, cache_size) > 0) {
         result = atoi(cache_size);
     }
diff --git a/libc/private/bionic_atomic_arm.h b/libc/private/bionic_atomic_arm.h
new file mode 100644
index 0000000..275c1c9
--- /dev/null
+++ b/libc/private/bionic_atomic_arm.h
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_ARM_H
+#define BIONIC_ATOMIC_ARM_H
+
+#include <machine/cpu-features.h>
+
+/* Some of the harware instructions used below are not available in Thumb-1
+ * mode (they are if you build in ARM or Thumb-2 mode though). To solve this
+ * problem, we're going to use the same technique than libatomics_ops,
+ * which is to temporarily switch to ARM, do the operation, then switch
+ * back to Thumb-1.
+ *
+ * This results in two 'bx' jumps, just like a normal function call, but
+ * everything is kept inlined, avoids loading or computing the function's
+ * address, and prevents a little I-cache trashing too.
+ *
+ * However, it is highly recommended to avoid compiling any C library source
+ * file that use these functions in Thumb-1 mode.
+ *
+ * Define three helper macros to implement this:
+ */
+#if defined(__thumb__) && !defined(__thumb2__)
+#  define  __ATOMIC_SWITCH_TO_ARM \
+            "adr r3, 5f\n" \
+            "bx  r3\n" \
+            ".align\n" \
+            ".arm\n" \
+        "5:\n"
+/* note: the leading \n below is intentional */
+#  define __ATOMIC_SWITCH_TO_THUMB \
+            "\n" \
+            "adr r3, 6f\n" \
+            "bx  r3\n" \
+            ".thumb" \
+        "6:\n"
+
+#  define __ATOMIC_CLOBBERS   "r3"  /* list of clobbered registers */
+
+/* Warn the user that ARM mode should really be preferred! */
+#  warning Rebuilding this source file in ARM mode is highly recommended for performance!!
+
+#else
+#  define  __ATOMIC_SWITCH_TO_ARM   /* nothing */
+#  define  __ATOMIC_SWITCH_TO_THUMB /* nothing */
+#  define  __ATOMIC_CLOBBERS        /* nothing */
+#endif
+
+
+/* Define a full memory barrier, this is only needed if we build the
+ * platform for a multi-core device. For the record, using a 'dmb'
+ * instruction on a Nexus One device can take up to 180 ns even if
+ * it is completely un-necessary on this device.
+ *
+ * NOTE: This is where the platform and NDK headers atomic headers are
+ *        going to diverge. With the NDK, we don't know if the generated
+ *        code is going to run on a single or multi-core device, so we
+ *        need to be cautious.
+ *
+ *        Fortunately, we can use the kernel helper function that is
+ *        mapped at address 0xffff0fa0 in all user process, and that
+ *        provides a device-specific barrier operation.
+ *
+ *        I.e. on single-core devices, the helper immediately returns,
+ *        on multi-core devices, it uses "dmb" or any other means to
+ *        perform a full-memory barrier.
+ *
+ * There are three cases to consider for the platform:
+ *
+ *    - multi-core ARMv7-A       => use the 'dmb' hardware instruction
+ *    - multi-core ARMv6         => use the coprocessor
+ *    - single core ARMv5TE/6/7  => do not use any hardware barrier
+ */
+#if defined(ANDROID_SMP) && ANDROID_SMP == 1
+
+/* Sanity check, multi-core is only supported starting from ARMv6 */
+#  if __ARM_ARCH__ < 6
+#    error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6
+#  endif
+
+#  ifdef __ARM_HAVE_DMB
+/* For ARMv7-A, we can use the 'dmb' instruction directly */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't
+     * bother with __ATOMIC_SWITCH_TO_ARM */
+    __asm__ __volatile__ ( "dmb" : : : "memory" );
+}
+#  else /* !__ARM_HAVE_DMB */
+/* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor,
+ * which requires the use of a general-purpose register, which is slightly
+ * less efficient.
+ */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    __asm__ __volatile__ (
+        __SWITCH_TO_ARM
+        "mcr p15, 0, %0, c7, c10, 5"
+        __SWITCH_TO_THUMB
+        : : "r" (0) : __ATOMIC_CLOBBERS "memory");
+}
+#  endif /* !__ARM_HAVE_DMB */
+#else /* !ANDROID_SMP */
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    /* A simple compiler barrier */
+    __asm__ __volatile__ ( "" : : : "memory" );
+}
+#endif /* !ANDROID_SMP */
+
+/* Compare-and-swap, without any explicit barriers. Note that this functions
+ * returns 0 on success, and 1 on failure. The opposite convention is typically
+ * used on other platforms.
+ *
+ * There are two cases to consider:
+ *
+ *     - ARMv6+  => use LDREX/STREX instructions
+ *     - < ARMv6 => use kernel helper function mapped at 0xffff0fc0
+ *
+ * LDREX/STREX are only available starting from ARMv6
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%3]\n"
+            "mov %1, #0\n"
+            "teq %0, %4\n"
+#ifdef __thumb2__
+            "it eq\n"
+#endif
+            "strexeq %1, %5, [%3]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr), "Ir" (old_value), "r" (new_value)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev != old_value;
+}
+#  else /* !__ARM_HAVE_LDREX_STREX */
+
+/* Use the handy kernel helper function mapped at 0xffff0fc0 */
+typedef int (kernel_cmpxchg)(int32_t, int32_t, volatile int32_t *);
+
+__ATOMIC_INLINE__ int
+__kernel_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    /* Note: the kernel function returns 0 on success too */
+    return (*(kernel_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr);
+}
+
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    return __kernel_cmpxchg(old_value, new_value, ptr);
+}
+#endif /* !__ARM_HAVE_LDREX_STREX */
+
+/* Swap operation, without any explicit barriers.
+ * There are again two similar cases to consider:
+ *
+ *   ARMv6+ => use LDREX/STREX
+ *   < ARMv6 => use SWP instead.
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%3]\n"
+            "strex %1, %4, [%3]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (status), "+m" (*ptr)
+            : "r" (ptr), "r" (new_value)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else /* !__ARM_HAVE_LDREX_STREX */
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    /* NOTE: SWP is available in Thumb-1 too */
+    __asm__ __volatile__ ("swp %0, %2, [%3]"
+                          : "=&r" (prev), "+m" (*ptr)
+                          : "r" (new_value), "r" (ptr)
+                          : "cc");
+    return prev;
+}
+#endif /* !__ARM_HAVE_LDREX_STREX */
+
+/* Atomic increment - without any barriers
+ * This returns the old value
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    int32_t prev, tmp, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%4]\n"
+            "add %1, %0, #1\n"
+            "strex %2, %1, [%4]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    int32_t  prev, status;
+    do {
+        prev = *ptr;
+        status = __kernel_cmpxchg(prev, prev+1, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+/* Atomic decrement - without any barriers
+ * This returns the old value.
+ */
+#ifdef __ARM_HAVE_LDREX_STREX
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    int32_t prev, tmp, status;
+    do {
+        __asm__ __volatile__ (
+            __ATOMIC_SWITCH_TO_ARM
+            "ldrex %0, [%4]\n"
+            "sub %1, %0, #1\n"
+            "strex %2, %1, [%4]"
+            __ATOMIC_SWITCH_TO_THUMB
+            : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
+            : "r" (ptr)
+            : __ATOMIC_CLOBBERS "cc");
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#else
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    int32_t  prev, status;
+    do {
+        prev = *ptr;
+        status = __kernel_cmpxchg(prev, prev-1, ptr);
+    } while (__builtin_expect(status != 0, 0));
+    return prev;
+}
+#endif
+
+#endif /* SYS_ATOMICS_ARM_H */
diff --git a/libc/private/bionic_atomic_gcc_builtin.h b/libc/private/bionic_atomic_gcc_builtin.h
new file mode 100644
index 0000000..e7c5761
--- /dev/null
+++ b/libc/private/bionic_atomic_gcc_builtin.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_GCC_BUILTIN_H
+#define BIONIC_ATOMIC_GCC_BUILTIN_H
+
+/* This header file is used by default if we don't have optimized atomic
+ * routines for a given platform. See bionic_atomic_arm.h and
+ * bionic_atomic_x86.h for examples.
+ */
+
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier(void)
+{
+    __sync_synchronize();
+}
+
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    /* We must return 0 on success */
+    return __sync_bool_compare_and_swap(ptr, old_value, new_value) == 0;
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    do {
+        prev = *ptr;
+        status = __sync_val_compare_and_swap(ptr, prev, new_value);
+    } while (status == 0);
+    return prev;
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t* ptr)
+{
+    /* We must return the old value */
+    return __sync_fetch_and_add(ptr, 1);
+}
+
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t* ptr)
+{
+    /* We must return the old value */
+    return __sync_fetch_and_add(ptr, -1);
+}
+
+#endif /* BIONIC_ATOMIC_GCC_BUILTIN_H */
diff --git a/libc/private/bionic_atomic_inline.h b/libc/private/bionic_atomic_inline.h
index 95766e1..821ad39 100644
--- a/libc/private/bionic_atomic_inline.h
+++ b/libc/private/bionic_atomic_inline.h
@@ -43,62 +43,21 @@
 extern "C" {
 #endif
 
-/*
- * Define the full memory barrier for an SMP system.  This is
- * platform-specific.
+/* Define __ATOMIC_INLINE__ to control the inlining of all atomics
+ * functions declared here. For a slight performance boost, we want
+ * all of them to be always_inline
  */
+#define  __ATOMIC_INLINE__  static __inline__ __attribute__((always_inline))
 
 #ifdef __arm__
-#include <machine/cpu-features.h>
-
-/*
- * For ARMv6K we need to issue a specific MCR instead of the DMB, since
- * that wasn't added until v7.  For anything older, SMP isn't relevant.
- * Since we don't have an ARMv6K to test with, we're not going to deal
- * with that now.
- *
- * The DMB instruction is found in the ARM and Thumb2 instruction sets.
- * This will fail on plain 16-bit Thumb.
- */
-#if defined(__ARM_HAVE_DMB)
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0)
+#  include <bionic_atomic_arm.h>
+#elif defined(__i386__)
+#  include <bionic_atomic_x86.h>
 #else
-# define _ANDROID_MEMBAR_FULL_SMP()  ARM_SMP_defined_but_no_DMB()
+#  include <bionic_atomic_gcc_builtin.h>
 #endif
 
-#elif defined(__i386__) || defined(__x86_64__)
-/*
- * For recent x86, we can use the SSE2 mfence instruction.
- */
-# define _ANDROID_MEMBAR_FULL_SMP() \
-    do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0)
-
-#else
-/*
- * Implementation not defined for this platform.  Hopefully we're building
- * in uniprocessor mode.
- */
-# define _ANDROID_MEMBAR_FULL_SMP()  SMP_barrier_not_defined_for_platform()
-#endif
-
-
-/*
- * Full barrier.  On uniprocessors this is just a compiler reorder barrier,
- * which ensures that the statements appearing above the barrier in the C/C++
- * code will be issued after the statements appearing below the barrier.
- *
- * For SMP this also includes a memory barrier instruction.  On an ARM
- * CPU this means that the current core will flush pending writes, wait
- * for pending reads to complete, and discard any cached reads that could
- * be stale.  Other CPUs may do less, but the end result is equivalent.
- */
-#if ANDROID_SMP != 0
-# define ANDROID_MEMBAR_FULL() _ANDROID_MEMBAR_FULL_SMP()
-#else
-# define ANDROID_MEMBAR_FULL() \
-    do { __asm__ __volatile__ ("" ::: "memory"); } while (0)
-#endif
+#define ANDROID_MEMBAR_FULL  __bionic_memory_barrier
 
 #ifdef __cplusplus
 } // extern "C"
diff --git a/libc/private/bionic_atomic_x86.h b/libc/private/bionic_atomic_x86.h
new file mode 100644
index 0000000..aca0c4b
--- /dev/null
+++ b/libc/private/bionic_atomic_x86.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BIONIC_ATOMIC_X86_H
+#define BIONIC_ATOMIC_X86_H
+
+/* Define a full memory barrier, this is only needed if we build the
+ * platform for a multi-core device.
+ */
+#if defined(ANDROID_SMP) && ANDROID_SMP == 1
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier()
+{
+    __asm__ __volatile__ ( "mfence" : : : "memory" );
+}
+#else
+__ATOMIC_INLINE__ void
+__bionic_memory_barrier()
+{
+    /* A simple compiler barrier */
+    __asm__ __volatile__ ( "" : : : "memory" );
+}
+#endif
+
+/* Compare-and-swap, without any explicit barriers. Note that this function
+ * returns 0 on success, and 1 on failure. The opposite convention is typically
+ * used on other platforms.
+ */
+__ATOMIC_INLINE__ int
+__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
+{
+    int32_t prev;
+    __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
+                          : "=a" (prev)
+                          : "q" (new_value), "m" (*ptr), "0" (old_value)
+                          : "memory");
+    return prev != old_value;
+}
+
+
+/* Swap, without any explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_swap(int32_t new_value, volatile int32_t *ptr)
+{
+    __asm__ __volatile__ ("xchgl %1, %0"
+                          : "=r" (new_value)
+                          : "m" (*ptr), "0" (new_value)
+                          : "memory");
+    return new_value;
+}
+
+/* Atomic increment, without explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_inc(volatile int32_t *ptr)
+{
+    int increment = 1;
+    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                          : "+r" (increment), "+m" (*ptr)
+                          : : "memory");
+    /* increment now holds the old value of *ptr */
+    return increment;
+}
+
+/* Atomic decrement, without explicit barriers */
+__ATOMIC_INLINE__ int32_t
+__bionic_atomic_dec(volatile int32_t *ptr)
+{
+    int increment = -1;
+    __asm__ __volatile__ ("lock; xaddl %0, %1"
+                          : "+r" (increment), "+m" (*ptr)
+                          : : "memory");
+    /* increment now holds the old value of *ptr */
+    return increment;
+}
+
+#endif /* BIONIC_ATOMIC_X86_H */
diff --git a/libc/arch-x86/include/sys/atomics.h b/libc/private/bionic_time.h
similarity index 63%
rename from libc/arch-x86/include/sys/atomics.h
rename to libc/private/bionic_time.h
index 7aed3ae..ee82254 100644
--- a/libc/arch-x86/include/sys/atomics.h
+++ b/libc/private/bionic_time.h
@@ -25,41 +25,34 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
-#ifndef _SYS_ATOMICS_H
-#define _SYS_ATOMICS_H
+#ifndef _BIONIC_TIME_H
+#define _BIONIC_TIME_H
 
-#include <sys/cdefs.h>
-#include <sys/time.h>
+#include <time.h>
 
 __BEGIN_DECLS
 
-static inline __attribute__((always_inline)) int
-__atomic_cmpxchg(int old, int _new, volatile int *ptr)
-{
-  return !__sync_bool_compare_and_swap (ptr, old, _new);
-}
+#ifndef _BIONIC_STRFTIME_TZ_DECLARED
+#define _BIONIC_STRFTIME_TZ_DECLARED
 
-static inline __attribute__((always_inline)) int
-__atomic_swap(int _new, volatile int *ptr)
-{
-  return __sync_lock_test_and_set(ptr, _new);
-}
+struct strftime_locale {
+    const char *  mon[12];
+    const char *  month[12];
+    const char *  standalone_month[12];
+    const char *  wday[7];
+    const char *  weekday[7];
+    const char *  X_fmt;
+    const char *  x_fmt;
+    const char *  c_fmt;
+    const char *  am;
+    const char *  pm;
+    const char *  date_fmt;
+};
 
-static inline __attribute__((always_inline)) int
-__atomic_dec(volatile int *ptr)
-{
-  return __sync_fetch_and_sub (ptr, 1);
-}
+extern size_t      strftime_tz(char *s, size_t max, const char *format, const struct tm *tm, const struct strftime_locale*  lc);
 
-static inline __attribute__((always_inline)) int
-__atomic_inc(volatile int *ptr)
-{
-  return __sync_fetch_and_add (ptr, 1);
-}
-
-int __futex_wait(volatile void *ftx, int val, const struct timespec *timeout);
-int __futex_wake(volatile void *ftx, int count);
+#endif /* _BIONIC_STRFTIME_TZ_DECLARED */
 
 __END_DECLS
 
-#endif /* _SYS_ATOMICS_H */
+#endif /* _BIONIC_TIME_H */
diff --git a/libc/private/resolv_iface.h b/libc/private/resolv_iface.h
new file mode 100644
index 0000000..f562687
--- /dev/null
+++ b/libc/private/resolv_iface.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef _RESOLV_IFACE_H
+#define _RESOLV_IFACE_H
+
+/* This header contains declarations related to per-interface DNS
+ * server selection. They are used by system/netd/ and should not be
+ * exposed by the C library's public NDK headers.
+ *
+ * NOTE: <resolv.h> contains the same declarations, this will be removed
+ *        when we change system/netd to use this header instead.
+ */
+#include <sys/cdefs.h>
+#include <netinet/in.h>
+
+__BEGIN_DECLS
+
+/* Use a guard macro until we remove the same definitions from <resolv.h> */
+#ifndef _BIONIC_RESOLV_IFACE_FUNCTIONS_DECLARED
+#define _BIONIC_RESOLV_IFACE_FUNCTIONS_DECLARED
+
+/* Set name of default interface */
+extern void _resolv_set_default_iface(const char* ifname);
+
+/* set name servers for an interface */
+extern void _resolv_set_nameservers_for_iface(const char* ifname, char** servers, int numservers);
+
+/* tell resolver of the address of an interface */
+extern void _resolv_set_addr_of_iface(const char* ifname, struct in_addr* addr);
+
+/* flush the cache associated with the default interface */
+extern void _resolv_flush_cache_for_default_iface();
+
+/* flush the cache associated with a certain interface */
+extern void _resolv_flush_cache_for_iface(const char* ifname);
+
+#endif /* _BIONIC_RESOLV_IFACE_FUNCTIONS_DECLARED */
+
+__END_DECLS
+
+#endif /* _RESOLV_IFACE_H */
diff --git a/libc/stdio/fileext.h b/libc/stdio/fileext.h
index 2d07043..b36a448 100644
--- a/libc/stdio/fileext.h
+++ b/libc/stdio/fileext.h
@@ -29,24 +29,41 @@
  * $Citrus$
  */
 
+#include <pthread.h>
+#include "wcio.h"
+
 /*
  * file extension
  */
 struct __sfileext {
 	struct	__sbuf _ub; /* ungetc buffer */
 	struct wchar_io_data _wcio;	/* wide char io status */
+	pthread_mutex_t _lock; /* file lock */
 };
 
+#define _FILEEXT_INITIALIZER  {{NULL,0},{0},PTHREAD_RECURSIVE_MUTEX_INITIALIZER}
+
 #define _EXT(fp) ((struct __sfileext *)((fp)->_ext._base))
 #define _UB(fp) _EXT(fp)->_ub
+#define _FLOCK(fp)  _EXT(fp)->_lock
 
 #define _FILEEXT_INIT(fp) \
 do { \
 	_UB(fp)._base = NULL; \
 	_UB(fp)._size = 0; \
 	WCIO_INIT(fp); \
+	_FLOCK_INIT(fp); \
 } while (0)
 
+/* Helper macros to avoid a function call when you know that fp is not NULL.
+ * Notice that we keep _FLOCK_INIT() fast by slightly breaking our pthread
+ * encapsulation.
+ */
+#define _FLOCK_INIT(fp)    _FLOCK(fp).value = __PTHREAD_RECURSIVE_MUTEX_INIT_VALUE
+#define _FLOCK_LOCK(fp)    pthread_mutex_lock(&_FLOCK(fp))
+#define _FLOCK_TRYLOCK(fp) pthread_mutex_trylock(&_FLOCK(fp))
+#define _FLOCK_UNLOCK(fp)  pthread_mutex_unlock(&_FLOCK(fp))
+
 #define _FILEEXT_SETUP(f, fext) \
 do { \
 	(f)->_ext._base = (unsigned char *)(fext); \
diff --git a/libc/stdio/findfp.c b/libc/stdio/findfp.c
index a659c87..76ed5ee 100644
--- a/libc/stdio/findfp.c
+++ b/libc/stdio/findfp.c
@@ -58,7 +58,12 @@
 static struct glue *lastglue = &uglue;
 _THREAD_PRIVATE_MUTEX(__sfp_mutex);
 
-static struct __sfileext __sFext[3];
+static struct __sfileext __sFext[3] = {
+	_FILEEXT_INITIALIZER,
+	_FILEEXT_INITIALIZER,
+	_FILEEXT_INITIALIZER,
+};
+
 FILE __sF[3] = {
 	std(__SRD, STDIN_FILENO),		/* stdin */
 	std(__SWR, STDOUT_FILENO),		/* stdout */
diff --git a/libc/stdio/flockfile.c b/libc/stdio/flockfile.c
index e8c74c5..368fb15 100644
--- a/libc/stdio/flockfile.c
+++ b/libc/stdio/flockfile.c
@@ -31,122 +31,23 @@
  * we can't use the OpenBSD implementation which uses kernel-specific
  * APIs not available on Linux.
  *
- * Ideally, this would be trivially implemented by adding a
- * pthread_mutex_t field to struct __sFILE as defined in
- * <stdio.h>.
- *
- * However, since we don't want to bring pthread into the mix
- * as well as change the size of a public API/ABI structure,
- * we're going to store the data out-of-band.
- *
- * we use a hash-table to map FILE* pointers to recursive mutexes
- * fclose() will call __fremovelock() defined below to remove
- * a pointer from the table.
+ * Instead, we use a pthread_mutex_t within the FILE* internal state.
+ * See fileext.h for details.
  *
  * the behaviour, if fclose() is called while the corresponding
  * file is locked is totally undefined.
  */
 #include <stdio.h>
-#include <pthread.h>
 #include <string.h>
+#include <errno.h>
+#include "fileext.h"
 
-/* a node in the hash table */
-typedef struct FileLock {
-    struct FileLock*  next;
-    FILE*             file;
-    pthread_mutex_t   mutex;
-} FileLock;
-
-/* use a static hash table. We assume that we're not going to
- * lock a really large number of FILE* objects on an embedded
- * system.
- */
-#define  FILE_LOCK_BUCKETS  32
-
-typedef struct {
-    pthread_mutex_t   lock;
-    FileLock*         buckets[ FILE_LOCK_BUCKETS ];
-} LockTable;
-
-static LockTable*      _lockTable;
-static pthread_once_t  _lockTable_once = PTHREAD_ONCE_INIT;
-
-static void
-lock_table_init( void )
-{
-    _lockTable = malloc(sizeof(*_lockTable));
-    if (_lockTable != NULL) {
-        pthread_mutex_init(&_lockTable->lock, NULL);
-        memset(_lockTable->buckets, 0, sizeof(_lockTable->buckets));
-    }
-}
-
-static LockTable*
-lock_table_lock( void )
-{
-    pthread_once( &_lockTable_once, lock_table_init );
-    pthread_mutex_lock( &_lockTable->lock );
-    return _lockTable;
-}
-
-static void
-lock_table_unlock( LockTable*  t )
-{
-    pthread_mutex_unlock( &t->lock );
-}
-
-static FileLock**
-lock_table_lookup( LockTable*  t, FILE*  f )
-{
-    uint32_t    hash = (uint32_t)(void*)f;
-    FileLock**  pnode;
-
-    hash = (hash >> 2) ^ (hash << 17);
-    pnode = &t->buckets[hash % FILE_LOCK_BUCKETS];
-    for (;;) {
-        FileLock*  node = *pnode;
-        if (node == NULL || node->file == f)
-            break;
-        pnode = &node->next;
-    }
-    return pnode;
-}
 
 void
 flockfile(FILE * fp)
 {
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock == NULL) {
-            pthread_mutexattr_t  attr;
-
-            /* create a new node in the hash table */
-            lock = malloc(sizeof(*lock));
-            if (lock == NULL) {
-                lock_table_unlock(t);
-                return;
-            }
-            lock->next        = NULL;
-            lock->file        = fp;
-
-            pthread_mutexattr_init(&attr);
-            pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
-            pthread_mutex_init( &lock->mutex, &attr );
-
-            *lookup           = lock;
-        }
-        lock_table_unlock(t);
-
-        /* we assume that another thread didn't destroy 'lock'
-        * by calling fclose() on the FILE*. This can happen if
-        * the client is *really* buggy, but we don't care about
-        * such code here.
-        */
-        pthread_mutex_lock(&lock->mutex);
+    if (fp != NULL) {
+        _FLOCK_LOCK(fp);
     }
 }
 
@@ -154,21 +55,13 @@
 int
 ftrylockfile(FILE *fp)
 {
-    int         ret = -1;
-    LockTable*  t   = lock_table_lock();
+    /* The specification for ftrylockfile() says it returns 0 on success,
+     * or non-zero on error. So return an errno code directly on error.
+     */
+    int  ret = EINVAL;
 
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        lock_table_unlock(t);
-
-        /* see above comment about why we assume that 'lock' can
-        * be accessed from here
-        */
-        if (lock != NULL && !pthread_mutex_trylock(&lock->mutex)) {
-            ret = 0;  /* signal success */
-        }
+    if (fp != NULL) {
+        ret = _FLOCK_TRYLOCK(fp);
     }
     return ret;
 }
@@ -176,35 +69,7 @@
 void
 funlockfile(FILE * fp)
 {
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock != NULL)
-            pthread_mutex_unlock(&lock->mutex);
-
-        lock_table_unlock(t);
-    }
-}
-
-
-/* called from fclose() to remove the file lock */
-__LIBC_HIDDEN__ void
-__fremovelock(FILE*  fp)
-{
-    LockTable*  t = lock_table_lock();
-
-    if (t != NULL) {
-        FileLock**  lookup = lock_table_lookup(t, fp);
-        FileLock*   lock   = *lookup;
-
-        if (lock != NULL) {
-            *lookup   = lock->next;
-            lock->file = NULL;
-        }
-        lock_table_unlock(t);
-        free(lock);
+    if (fp != NULL) {
+        _FLOCK_UNLOCK(fp);
     }
 }
diff --git a/libc/tools/gensyscalls.py b/libc/tools/gensyscalls.py
index bed9445..bfa9fcc 100755
--- a/libc/tools/gensyscalls.py
+++ b/libc/tools/gensyscalls.py
@@ -435,23 +435,6 @@
         self.other_files.append( path )
 
 
-    # now dump the content of linux/_syscalls.h
-    def gen_linux_unistd_h(self):
-        path = "include/sys/linux-unistd.h"
-        D( "generating "+path )
-        fp = create_file( path )
-        fp.write( "/* auto-generated by gensyscalls.py, do not touch */\n" )
-        fp.write( "#ifndef _BIONIC_LINUX_UNISTD_H_\n\n" );
-        fp.write( "#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n" )
-
-        for sc in self.syscalls:
-            fp.write( sc["decl"]+"\n" )
-
-        fp.write( "#ifdef __cplusplus\n}\n#endif\n" )
-        fp.write( "\n#endif /* _BIONIC_LINUX_UNISTD_H_ */\n" );
-        fp.close()
-        self.other_files.append( path )
-
     # now dump the contents of syscalls.mk
     def gen_arch_syscalls_mk(self, arch):
         path = "arch-%s/syscalls.mk" % arch
@@ -528,7 +511,6 @@
         self.gen_linux_syscalls_h()
         for arch in all_archs:
             self.gen_arch_syscalls_mk(arch)
-        self.gen_linux_unistd_h()
         self.gen_syscall_stubs()
 
         D( "comparing files" )
diff --git a/libc/tools/zoneinfo/generate b/libc/tools/zoneinfo/generate
index e48a3c9..3e21d0b 100755
--- a/libc/tools/zoneinfo/generate
+++ b/libc/tools/zoneinfo/generate
@@ -37,6 +37,9 @@
   exit 1
 fi
 
+md5_sum=`md5sum $latest_archive`
+echo "MD5: $md5_sum"
+
 echo "Extracting $latest_version..."
 mkdir $latest_version
 tar -C $latest_version -zxf $latest_archive
diff --git a/libc/tzcode/strftime.c b/libc/tzcode/strftime.c
index a2cc3b3..f6e7435 100644
--- a/libc/tzcode/strftime.c
+++ b/libc/tzcode/strftime.c
@@ -38,6 +38,7 @@
 #include "locale.h"
 #include <ctype.h>
 #include <time64.h>
+#include <bionic_time.h>  /* for strftime_tz */
 
 /* struct lc_time_T is now defined as strftime_locale
  * in <time.h>
diff --git a/libstdc++/src/one_time_construction.cpp b/libstdc++/src/one_time_construction.cpp
index 2a44c79..f3d7138 100644
--- a/libstdc++/src/one_time_construction.cpp
+++ b/libstdc++/src/one_time_construction.cpp
@@ -20,11 +20,11 @@
     // 6 untouched, wait and return 0
     // 1 untouched, return 0
 retry:
-    if (__atomic_cmpxchg(0, 0x2, gv) == 0) {
+    if (__bionic_cmpxchg(0, 0x2, gv) == 0) {
         ANDROID_MEMBAR_FULL();
         return 1;
     }
-    __atomic_cmpxchg(0x2, 0x6, gv); // Indicate there is a waiter
+    __bionic_cmpxchg(0x2, 0x6, gv); // Indicate there is a waiter
     __futex_wait(gv, 0x6, NULL);
 
     if(*gv != 1) // __cxa_guard_abort was called, let every thread try since there is no return code for this condition
@@ -39,7 +39,7 @@
     // 2 -> 1
     // 6 -> 1, and wake
     ANDROID_MEMBAR_FULL();
-    if (__atomic_cmpxchg(0x2, 0x1, gv) == 0) {
+    if (__bionic_cmpxchg(0x2, 0x1, gv) == 0) {
         return;
     }
 
diff --git a/linker/Android.mk b/linker/Android.mk
index 803b6c3..8f8cc2b 100644
--- a/linker/Android.mk
+++ b/linker/Android.mk
@@ -10,27 +10,9 @@
 	dlfcn.c \
 	debugger.c
 
-ifeq ($(TARGET_ARCH),sh)
-# SH-4A series virtual address range from 0x00000000 to 0x7FFFFFFF.
-LINKER_TEXT_BASE := 0x70000100
-else
-# This is aligned to 4K page boundary so that both GNU ld and gold work.  Gold
-# actually produces a correct binary with starting address 0xB0000100 but the
-# extra objcopy step to rename symbols causes the resulting binary to be misaligned
-# and unloadable.  Increasing the alignment adds an extra 3840 bytes in padding
-# but switching to gold saves about 1M of space.
-LINKER_TEXT_BASE := 0xB0001000
-endif
+LOCAL_LDFLAGS := -shared
 
-# The maximum size set aside for the linker, from
-# LINKER_TEXT_BASE rounded down to a megabyte.
-LINKER_AREA_SIZE := 0x01000000
-
-LOCAL_LDFLAGS := -Wl,-Ttext,$(LINKER_TEXT_BASE)
-
-LOCAL_CFLAGS += -DPRELINK
-LOCAL_CFLAGS += -DLINKER_TEXT_BASE=$(LINKER_TEXT_BASE)
-LOCAL_CFLAGS += -DLINKER_AREA_SIZE=$(LINKER_AREA_SIZE)
+LOCAL_CFLAGS += -fno-stack-protector
 
 # Set LINKER_DEBUG to either 1 or 0
 #
@@ -69,6 +51,9 @@
 LOCAL_MODULE_CLASS := EXECUTABLES
 LOCAL_MODULE_SUFFIX := $(TARGET_EXECUTABLE_SUFFIX)
 
+# we don't want crtbegin.o (because we have begin.o), so unset it
+# just for this module
+LOCAL_NO_CRT := true
 
 include $(BUILD_SYSTEM)/dynamic_binary.mk
 
@@ -80,11 +65,3 @@
 #
 # end of BUILD_EXECUTABLE hack
 #
-
-# we don't want crtbegin.o (because we have begin.o), so unset it
-# just for this module
-$(LOCAL_BUILT_MODULE): TARGET_CRTBEGIN_STATIC_O :=
-# This line is not strictly necessary because the dynamic linker is built
-# as a static executable, but it won't hurt if in the future we start
-# building the linker as a dynamic one.
-$(LOCAL_BUILT_MODULE): TARGET_CRTBEGIN_DYNAMIC_O :=
diff --git a/linker/README.TXT b/linker/README.TXT
index a8efe35..f920b97 100644
--- a/linker/README.TXT
+++ b/linker/README.TXT
@@ -87,6 +87,7 @@
       present in executables, not shared libraries, which contains
       a list of functions that need to be called before any other
       initialization function (i.e. DT_INIT and/or DT_INIT_ARRAY)
+      in the executable or any of its libraries.
 
       Note: this is generally stored in a .preinit_array section
 
diff --git a/linker/debugger.c b/linker/debugger.c
index 648dc78..ef8286c 100644
--- a/linker/debugger.c
+++ b/linker/debugger.c
@@ -126,6 +126,7 @@
  */
 void debugger_signal_handler(int n, siginfo_t* info, void* unused)
 {
+    char msgbuf[128];
     unsigned tid;
     int s;
 
@@ -134,7 +135,7 @@
     tid = gettid();
     s = socket_abstract_client("android:debuggerd", SOCK_STREAM);
 
-    if(s >= 0) {
+    if (s >= 0) {
         /* debugger knows our pid from the credentials on the
          * local socket but we need to tell it our tid.  It
          * is paranoid and will verify that we are giving a tid
@@ -147,9 +148,24 @@
             /* if the write failed, there is no point to read on
              * the file descriptor. */
             RETRY_ON_EINTR(ret, read(s, &tid, 1));
+            int savedErrno = errno;
             notify_gdb_of_libraries();
+            errno = savedErrno;
         }
+
+        if (ret < 0) {
+            /* read or write failed -- broken connection? */
+            format_buffer(msgbuf, sizeof(msgbuf),
+                "Failed while talking to debuggerd: %s", strerror(errno));
+            __libc_android_log_write(ANDROID_LOG_FATAL, "libc", msgbuf);
+        }
+
         close(s);
+    } else {
+        /* socket failed; maybe process ran out of fds */
+        format_buffer(msgbuf, sizeof(msgbuf),
+            "Unable to open connection to debuggerd: %s", strerror(errno));
+        __libc_android_log_write(ANDROID_LOG_FATAL, "libc", msgbuf);
     }
 
     /* remove our net so we fault for real when we return */
diff --git a/linker/dlfcn.c b/linker/dlfcn.c
index 5964bd1..529511f 100644
--- a/linker/dlfcn.c
+++ b/linker/dlfcn.c
@@ -60,6 +60,7 @@
     if (unlikely(ret == NULL)) {
         set_dlerror(DL_ERR_CANNOT_LOAD_LIBRARY);
     } else {
+        call_constructors_recursive(ret);
         ret->refcount++;
     }
     pthread_mutex_unlock(&dl_lock);
diff --git a/linker/linker.c b/linker/linker.c
index bcfa8dc..5656941 100644
--- a/linker/linker.c
+++ b/linker/linker.c
@@ -313,15 +313,6 @@
     freelist = si;
 }
 
-#ifndef LINKER_TEXT_BASE
-#error "linker's makefile must define LINKER_TEXT_BASE"
-#endif
-#ifndef LINKER_AREA_SIZE
-#error "linker's makefile must define LINKER_AREA_SIZE"
-#endif
-#define LINKER_BASE ((LINKER_TEXT_BASE) & 0xfff00000)
-#define LINKER_TOP  (LINKER_BASE + (LINKER_AREA_SIZE))
-
 const char *addr_to_name(unsigned addr)
 {
     soinfo *si;
@@ -332,10 +323,6 @@
         }
     }
 
-    if((addr >= LINKER_BASE) && (addr < LINKER_TOP)){
-        return "linker";
-    }
-
     return "";
 }
 
@@ -354,12 +341,10 @@
     soinfo *si;
     unsigned addr = (unsigned)pc;
 
-    if ((addr < LINKER_BASE) || (addr >= LINKER_TOP)) {
-        for (si = solist; si != 0; si = si->next){
-            if ((addr >= si->base) && (addr < (si->base + si->size))) {
-                *pcount = si->ARM_exidx_count;
-                return (_Unwind_Ptr)(si->base + (unsigned long)si->ARM_exidx);
-            }
+    for (si = solist; si != 0; si = si->next){
+        if ((addr >= si->base) && (addr < (si->base + si->size))) {
+            *pcount = si->ARM_exidx_count;
+            return (_Unwind_Ptr)(si->base + (unsigned long)si->ARM_exidx);
         }
     }
    *pcount = 0;
@@ -420,6 +405,33 @@
     return NULL;
 }
 
+/*
+ * Essentially the same method as _elf_lookup() above, but only
+ * searches for LOCAL symbols
+ */
+static Elf32_Sym *_elf_lookup_local(soinfo *si, unsigned hash, const char *name)
+{
+    Elf32_Sym *symtab = si->symtab;
+    const char *strtab = si->strtab;
+    unsigned n = hash % si->nbucket;;
+
+    TRACE_TYPE(LOOKUP, "%5d LOCAL SEARCH %s in %s@0x%08x %08x %d\n", pid,
+               name, si->name, si->base, hash, hash % si->nbucket);
+    for(n = si->bucket[hash % si->nbucket]; n != 0; n = si->chain[n]){
+        Elf32_Sym *s = symtab + n;
+        if (strcmp(strtab + s->st_name, name)) continue;
+        if (ELF32_ST_BIND(s->st_info) != STB_LOCAL) continue;
+        /* no section == undefined */
+        if(s->st_shndx == 0) continue;
+
+        TRACE_TYPE(LOOKUP, "%5d FOUND LOCAL %s in %s (%08x) %d\n", pid,
+                   name, si->name, s->st_value, s->st_size);
+        return s;
+    }
+
+    return NULL;
+}
+
 static unsigned elfhash(const char *_name)
 {
     const unsigned char *name = (const unsigned char *) _name;
@@ -443,7 +455,17 @@
     soinfo *lsi = si;
     int i;
 
-    /* Look for symbols in the local scope first (the object who is
+    /* If we are trying to find a symbol for the linker itself, look
+     * for LOCAL symbols first. Avoid using LOCAL symbols for other
+     * shared libraries until we have a better understanding of what
+     * might break by doing so. */
+    if (si->flags & FLAG_LINKER) {
+        s = _elf_lookup_local(si, elf_hash, name);
+        if(s != NULL)
+            goto done;
+    }
+
+    /* Look for symbols in the local scope (the object who is
      * searching). This happens with C++ templates on i386 for some
      * reason.
      *
@@ -452,6 +474,7 @@
      * dynamic linking.  Some systems return the first definition found
      * and some the first non-weak definition.   This is system dependent.
      * Here we return the first definition found for simplicity.  */
+
     s = _elf_lookup(si, elf_hash, name);
     if(s != NULL)
         goto done;
@@ -1499,8 +1522,24 @@
     }
 }
 
-static void call_constructors(soinfo *si)
+void call_constructors_recursive(soinfo *si)
 {
+    if (si->constructors_called)
+        return;
+
+    // Set this before actually calling the constructors, otherwise it doesn't
+    // protect against recursive constructor calls. One simple example of
+    // constructor recursion is the libc debug malloc, which is implemented in
+    // libc_malloc_debug_leak.so:
+    // 1. The program depends on libc, so libc's constructor is called here.
+    // 2. The libc constructor calls dlopen() to load libc_malloc_debug_leak.so.
+    // 3. dlopen() calls call_constructors_recursive() with the newly created
+    //    soinfo for libc_malloc_debug_leak.so.
+    // 4. The debug so depends on libc, so call_constructors_recursive() is
+    //    called again with the libc soinfo. If it doesn't trigger the early-
+    //    out above, the libc constructor will be called again (recursively!).
+    si->constructors_called = 1;
+
     if (si->flags & FLAG_EXE) {
         TRACE("[ %5d Calling preinit_array @ 0x%08x [%d] for '%s' ]\n",
               pid, (unsigned)si->preinit_array, si->preinit_array_count,
@@ -1515,6 +1554,21 @@
         }
     }
 
+    if (si->dynamic) {
+        unsigned *d;
+        for(d = si->dynamic; *d; d += 2) {
+            if(d[0] == DT_NEEDED){
+                soinfo* lsi = (soinfo *)d[1];
+                if (!validate_soinfo(lsi)) {
+                    DL_ERR("%5d bad DT_NEEDED pointer in %s",
+                           pid, si->name);
+                } else {
+                    call_constructors_recursive(lsi);
+                }
+            }
+        }
+    }
+
     if (si->init_func) {
         TRACE("[ %5d Calling init_func @ 0x%08x for '%s' ]\n", pid,
               (unsigned)si->init_func, si->name);
@@ -1528,8 +1582,8 @@
         call_array(si->init_array, si->init_array_count, 0);
         TRACE("[ %5d Done calling init_array for '%s' ]\n", pid, si->name);
     }
-}
 
+}
 
 static void call_destructors(soinfo *si)
 {
@@ -1628,10 +1682,10 @@
     DEBUG("%5d si->base = 0x%08x si->flags = 0x%08x\n", pid,
           si->base, si->flags);
 
-    if (si->flags & FLAG_EXE) {
+    if (si->flags & (FLAG_EXE | FLAG_LINKER)) {
         /* Locate the needed program segments (DYNAMIC/ARM_EXIDX) for
-         * linkage info if this is the executable. If this was a
-         * dynamic lib, that would have been done at load time.
+         * linkage info if this is the executable or the linker itself. 
+         * If this was a dynamic lib, that would have been done at load time.
          *
          * TODO: It's unfortunate that small pieces of this are
          * repeated from the load_library routine. Refactor this just
@@ -1650,16 +1704,17 @@
             if (phdr->p_type == PT_LOAD) {
                 /* For the executable, we use the si->size field only in
                    dl_unwind_find_exidx(), so the meaning of si->size
-                   is not the size of the executable; it is the last
-                   virtual address of the loadable part of the executable;
-                   since si->base == 0 for an executable, we use the
-                   range [0, si->size) to determine whether a PC value
-                   falls within the executable section.  Of course, if
-                   a value is below phdr->p_vaddr, it's not in the
-                   executable section, but a) we shouldn't be asking for
-                   such a value anyway, and b) if we have to provide
-                   an EXIDX for such a value, then the executable's
-                   EXIDX is probably the better choice.
+                   is not the size of the executable; it is the distance
+                   between the load location of the executable and the last
+                   address of the loadable part of the executable.
+                   We use the range [si->base, si->base + si->size) to
+                   determine whether a PC value falls within the executable
+                   section. Of course, if a value is between si->base and
+                   (si->base + phdr->p_vaddr), it's not in the executable
+                   section, but a) we shouldn't be asking for such a value
+                   anyway, and b) if we have to provide an EXIDX for such a
+                   value, then the executable's EXIDX is probably the better
+                   choice.
                 */
                 DEBUG_DUMP_PHDR(phdr, "PT_LOAD", pid);
                 if (phdr->p_vaddr + phdr->p_memsz > si->size)
@@ -1669,12 +1724,20 @@
                 if (!(phdr->p_flags & PF_W)) {
                     unsigned _end;
 
-                    if (phdr->p_vaddr < si->wrprotect_start)
-                        si->wrprotect_start = phdr->p_vaddr;
-                    _end = (((phdr->p_vaddr + phdr->p_memsz + PAGE_SIZE - 1) &
+                    if (si->base + phdr->p_vaddr < si->wrprotect_start)
+                        si->wrprotect_start = si->base + phdr->p_vaddr;
+                    _end = (((si->base + phdr->p_vaddr + phdr->p_memsz + PAGE_SIZE - 1) &
                              (~PAGE_MASK)));
                     if (_end > si->wrprotect_end)
                         si->wrprotect_end = _end;
+                    /* Make the section writable just in case we'll have to
+                     * write to it during relocation (i.e. text segment).
+                     * However, we will remember what range of addresses
+                     * should be write protected.
+                     */
+                    mprotect((void *) (si->base + phdr->p_vaddr),
+                             phdr->p_memsz,
+                             PFLAGS_TO_PROT(phdr->p_flags) | PROT_WRITE);
                 }
             } else if (phdr->p_type == PT_DYNAMIC) {
                 if (si->dynamic != (unsigned *)-1) {
@@ -1884,7 +1947,6 @@
     if (program_is_setuid)
         nullify_closed_stdio ();
     notify_gdb_of_load(si);
-    call_constructors(si);
     return 0;
 
 fail:
@@ -1940,16 +2002,16 @@
     }
 }
 
-int main(int argc, char **argv)
-{
-    return 0;
-}
-
 #define ANDROID_TLS_SLOTS  BIONIC_TLS_SLOTS
 
 static void * __tls_area[ANDROID_TLS_SLOTS];
 
-unsigned __linker_init(unsigned **elfdata)
+/*
+ * This code is called after the linker has linked itself and
+ * fixed it's own GOT. It is safe to make references to externs
+ * and other non-local data at this point.
+ */
+static unsigned __linker_init_post_relocation(unsigned **elfdata)
 {
     static soinfo linker_soinfo;
 
@@ -2069,7 +2131,18 @@
         vecs += 2;
     }
 
+    /* Compute the value of si->base. We can't rely on the fact that
+     * the first entry is the PHDR because this will not be true
+     * for certain executables (e.g. some in the NDK unit test suite)
+     */
+    int nn;
     si->base = 0;
+    for ( nn = 0; nn < si->phnum; nn++ ) {
+        if (si->phdr[nn].p_type == PT_PHDR) {
+            si->base = (Elf32_Addr) si->phdr - si->phdr[nn].p_vaddr;
+            break;
+        }
+    }
     si->dynamic = (unsigned *)-1;
     si->wrprotect_start = 0xffffffff;
     si->wrprotect_end = 0;
@@ -2090,6 +2163,8 @@
         exit(-1);
     }
 
+    call_constructors_recursive(si);
+
 #if ALLOW_SYMBOLS_FROM_MAIN
     /* Set somain after we've loaded all the libraries in order to prevent
      * linking of symbols back to the main image, which is not set up at that
@@ -2138,3 +2213,69 @@
           si->entry);
     return si->entry;
 }
+
+/*
+ * Find the value of AT_BASE passed to us by the kernel. This is the load
+ * location of the linker.
+ */
+static unsigned find_linker_base(unsigned **elfdata) {
+    int argc = (int) *elfdata;
+    char **argv = (char**) (elfdata + 1);
+    unsigned *vecs = (unsigned*) (argv + argc + 1);
+    while (vecs[0] != 0) {
+        vecs++;
+    }
+
+    /* The end of the environment block is marked by two NULL pointers */
+    vecs++;
+
+    while(vecs[0]) {
+        if (vecs[0] == AT_BASE) {
+            return vecs[1];
+        }
+        vecs += 2;
+    }
+
+    return 0; // should never happen
+}
+
+/*
+ * This is the entry point for the linker, called from begin.S. This
+ * method is responsible for fixing the linker's own relocations, and
+ * then calling __linker_init_post_relocation().
+ *
+ * Because this method is called before the linker has fixed it's own
+ * relocations, any attempt to reference an extern variable, extern
+ * function, or other GOT reference will generate a segfault.
+ */
+unsigned __linker_init(unsigned **elfdata) {
+    unsigned linker_addr = find_linker_base(elfdata);
+    Elf32_Ehdr *elf_hdr = (Elf32_Ehdr *) linker_addr;
+    Elf32_Phdr *phdr =
+        (Elf32_Phdr *)((unsigned char *) linker_addr + elf_hdr->e_phoff);
+
+    soinfo linker_so;
+    memset(&linker_so, 0, sizeof(soinfo));
+
+    linker_so.base = linker_addr;
+    linker_so.dynamic = (unsigned *) -1;
+    linker_so.phdr = phdr;
+    linker_so.phnum = elf_hdr->e_phnum;
+    linker_so.flags |= FLAG_LINKER;
+    linker_so.wrprotect_start = 0xffffffff;
+    linker_so.wrprotect_end = 0;
+
+    if (link_image(&linker_so, 0)) {
+        // It would be nice to print an error message, but if the linker
+        // can't link itself, there's no guarantee that we'll be able to
+        // call write() (because it involves a GOT reference).
+        //
+        // This situation should never occur unless the linker itself
+        // is corrupt.
+        exit(-1);
+    }
+
+    // We have successfully fixed our own relocations. It's safe to run
+    // the main part of the linker now.
+    return __linker_init_post_relocation(elfdata);
+}
diff --git a/linker/linker.h b/linker/linker.h
index d29484c..eb3c4c3 100644
--- a/linker/linker.h
+++ b/linker/linker.h
@@ -83,6 +83,7 @@
 #define FLAG_LINKED     0x00000001
 #define FLAG_ERROR      0x00000002
 #define FLAG_EXE        0x00000004 // The main executable
+#define FLAG_LINKER     0x00000010 // The linker itself
 
 #define SOINFO_NAME_LEN 128
 
@@ -140,6 +141,8 @@
 
     unsigned refcount;
     struct link_map linkmap;
+
+    int constructors_called;
 };
 
 
@@ -200,6 +203,7 @@
 soinfo *find_containing_library(const void *addr);
 Elf32_Sym *find_containing_symbol(const void *addr, soinfo *si);
 const char *linker_get_error(void);
+void call_constructors_recursive(soinfo *si);
 
 #ifdef ANDROID_ARM_LINKER 
 typedef long unsigned int *_Unwind_Ptr;