Support for jemalloc to replace dlmalloc.

To use jemalloc, add MALLOC_IMPL = jemalloc in a board config file
and you get the new version automatically.

Update the pthread_create_key tests since jemalloc uses a few keys.
Add a new test to verify memalign works as expected.

Bug: 981363

Change-Id: I16eb152b291a95bd2499e90492fc6b4bd7053836
diff --git a/libc/Android.mk b/libc/Android.mk
index 53a122e..543b4b4 100644
--- a/libc/Android.mk
+++ b/libc/Android.mk
@@ -498,6 +498,16 @@
   libc_common_cflags += -DDEBUG
 endif
 
+ifeq ($(MALLOC_IMPL),jemalloc)
+  libc_common_cflags += -DUSE_JEMALLOC
+
+  libc_malloc_src := bionic/jemalloc.cpp
+else
+  libc_common_cflags += -DUSE_DLMALLOC
+
+  libc_malloc_src := bionic/dlmalloc.cpp
+endif
+
 # To customize dlmalloc's alignment, set BOARD_MALLOC_ALIGNMENT in
 # the appropriate BoardConfig.mk file.
 #
@@ -526,6 +536,10 @@
     $(LOCAL_PATH)/stdlib  \
     $(LOCAL_PATH)/stdio   \
 
+ifeq ($(MALLOC_IMPL),jemalloc)
+  libc_common_c_includes += external/jemalloc/include
+endif
+
 # ========================================================
 # Add in the arch-specific flags.
 # Must be called with $(eval).
@@ -812,6 +826,11 @@
     libc_syscalls \
     libc_tzcode \
 
+ifeq ($(MALLOC_IMPL),jemalloc)
+LOCAL_WHOLE_STATIC_LIBRARIES += \
+    libjemalloc
+endif
+
 LOCAL_SYSTEM_SHARED_LIBRARIES :=
 
 # TODO: split out the asflags.
@@ -866,7 +885,7 @@
 LOCAL_SRC_FILES := \
     $(libc_arch_static_src_files) \
     $(libc_static_common_src_files) \
-    bionic/dlmalloc.c \
+    $(libc_malloc_src) \
     bionic/malloc_debug_common.cpp \
     bionic/libc_init_static.cpp \
 
@@ -896,11 +915,10 @@
 LOCAL_CONLYFLAGS := $(libc_common_conlyflags)
 LOCAL_CPPFLAGS := $(libc_common_cppflags)
 LOCAL_C_INCLUDES := $(libc_common_c_includes)
-
 LOCAL_SRC_FILES := \
     $(libc_arch_dynamic_src_files) \
     $(libc_static_common_src_files) \
-    bionic/dlmalloc.c \
+    $(libc_malloc_src) \
     bionic/malloc_debug_common.cpp \
     bionic/debug_mapinfo.cpp \
     bionic/debug_stacktrace.cpp \
diff --git a/libc/bionic/debug_mapinfo.cpp b/libc/bionic/debug_mapinfo.cpp
index c5b9aa7..e81ea54 100644
--- a/libc/bionic/debug_mapinfo.cpp
+++ b/libc/bionic/debug_mapinfo.cpp
@@ -30,7 +30,13 @@
 #include <string.h>
 #include <stdlib.h>
 
+#ifdef USE_JEMALLOC
+#include "jemalloc.h"
+#define Malloc(function)  je_ ## function
+#else
 #include "dlmalloc.h"
+#define Malloc(function)  dl ## function
+#endif
 #include "debug_mapinfo.h"
 
 // 6f000000-6f01e000 rwxp 00000000 00:0c 16389419   /system/lib/libcomposer.so
@@ -46,7 +52,7 @@
   if (len < 50) return 0;
   if (line[20] != 'x') return 0;
 
-  mapinfo_t* mi = static_cast<mapinfo_t*>(dlmalloc(sizeof(mapinfo_t) + (len - 47)));
+  mapinfo_t* mi = static_cast<mapinfo_t*>(Malloc(malloc)(sizeof(mapinfo_t) + (len - 47)));
   if (mi == 0) return 0;
 
   mi->start = strtoul(line, 0, 16);
@@ -79,7 +85,7 @@
   while (mi != NULL) {
     mapinfo_t* del = mi;
     mi = mi->next;
-    dlfree(del);
+    Malloc(free)(del);
   }
 }
 
diff --git a/libc/bionic/jemalloc.cpp b/libc/bionic/jemalloc.cpp
new file mode 100644
index 0000000..625d789
--- /dev/null
+++ b/libc/bionic/jemalloc.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <unistd.h>
+
+#include "jemalloc.h"
+
+void* je_pvalloc(size_t bytes) {
+  size_t pagesize = sysconf(_SC_PAGESIZE);
+  return je_memalign(pagesize, (bytes + pagesize - 1) & ~(pagesize - 1));
+}
+
+#ifdef je_memalign
+#undef je_memalign
+#endif
+
+// The man page for memalign says it fails if boundary is not a power of 2,
+// but this is not true. Both glibc and dlmalloc round up to the next power
+// of 2, so we'll do the same.
+void* je_memalign_round_up_boundary(size_t boundary, size_t size) {
+  unsigned int power_of_2 = static_cast<unsigned int>(boundary);
+  if (power_of_2 != 0) {
+    power_of_2 = 1UL << (sizeof(unsigned int)*8 - 1 - __builtin_clz(power_of_2));
+    if (power_of_2 != boundary) {
+      boundary = power_of_2 << 1;
+    }
+  } else {
+    boundary = 1;
+  }
+  return je_memalign(boundary, size);
+}
diff --git a/libc/bionic/jemalloc.h b/libc/bionic/jemalloc.h
new file mode 100644
index 0000000..feb1f43
--- /dev/null
+++ b/libc/bionic/jemalloc.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBC_BIONIC_JEMALLOC_H_
+#define LIBC_BIONIC_JEMALLOC_H_
+
+#include <jemalloc/jemalloc.h>
+
+// Need to wrap memalign since je_memalign fails on non-power of 2 alignments.
+#define je_memalign je_memalign_round_up_boundary
+
+__BEGIN_DECLS
+
+struct mallinfo je_mallinfo();
+void* je_memalign_round_up_boundary(size_t, size_t);
+void* je_pvalloc(size_t);
+
+__END_DECLS
+
+#endif  // LIBC_BIONIC_DLMALLOC_H_
diff --git a/libc/bionic/malloc_debug_check.cpp b/libc/bionic/malloc_debug_check.cpp
index 11578a3..2590ce7 100644
--- a/libc/bionic/malloc_debug_check.cpp
+++ b/libc/bionic/malloc_debug_check.cpp
@@ -47,7 +47,6 @@
 
 #include "debug_mapinfo.h"
 #include "debug_stacktrace.h"
-#include "dlmalloc.h"
 #include "private/libc_logging.h"
 #include "malloc_debug_common.h"
 #include "private/ScopedPthreadMutexLocker.h"
@@ -74,7 +73,7 @@
 
 struct hdr_t {
     uint32_t tag;
-    void* base;  // Always points to the memory allocated using dlmalloc.
+    void* base;  // Always points to the memory allocated using malloc.
                  // For memory allocated in chk_memalign, this value will
                  // not be the same as the location of the start of this
                  // structure.
@@ -321,14 +320,14 @@
     while (backlog_num > g_malloc_debug_backlog) {
         hdr_t* gone = backlog_tail;
         del_from_backlog_locked(gone);
-        dlfree(gone->base);
+        Malloc(free)(gone->base);
     }
 }
 
 extern "C" void* chk_malloc(size_t size) {
 //  log_message("%s: %s\n", __FILE__, __FUNCTION__);
 
-    hdr_t* hdr = static_cast<hdr_t*>(dlmalloc(sizeof(hdr_t) + size + sizeof(ftr_t)));
+    hdr_t* hdr = static_cast<hdr_t*>(Malloc(malloc)(sizeof(hdr_t) + size + sizeof(ftr_t)));
     if (hdr) {
         hdr->base = hdr;
         hdr->bt_depth = get_backtrace(hdr->bt, MAX_BACKTRACE_DEPTH);
@@ -356,7 +355,7 @@
         return NULL;
     }
 
-    void* base = dlmalloc(sizeof(hdr_t) + size + sizeof(ftr_t));
+    void* base = Malloc(malloc)(sizeof(hdr_t) + size + sizeof(ftr_t));
     if (base != NULL) {
         // Check that the actual pointer that will be returned is aligned
         // properly.
@@ -453,25 +452,25 @@
                        user(hdr), size);
             log_backtrace(bt, depth);
             // just get a whole new allocation and leak the old one
-            return dlrealloc(0, size);
-            // return dlrealloc(user(hdr), size); // assuming it was allocated externally
+            return Malloc(realloc)(0, size);
+            // return realloc(user(hdr), size); // assuming it was allocated externally
         }
     }
 
     if (hdr->base != hdr) {
         // An allocation from memalign, so create another allocation and
         // copy the data out.
-        void* newMem = dlmalloc(sizeof(hdr_t) + size + sizeof(ftr_t));
+        void* newMem = Malloc(malloc)(sizeof(hdr_t) + size + sizeof(ftr_t));
         if (newMem) {
             memcpy(newMem, hdr, sizeof(hdr_t) + hdr->size);
-            dlfree(hdr->base);
+            Malloc(free)(hdr->base);
             hdr = static_cast<hdr_t*>(newMem);
         } else {
-            dlfree(hdr->base);
+            Malloc(free)(hdr->base);
             hdr = NULL;
         }
     } else {
-        hdr = static_cast<hdr_t*>(dlrealloc(hdr, sizeof(hdr_t) + size + sizeof(ftr_t)));
+        hdr = static_cast<hdr_t*>(Malloc(realloc)(hdr, sizeof(hdr_t) + size + sizeof(ftr_t)));
     }
     if (hdr) {
         hdr->base = hdr;
@@ -486,7 +485,7 @@
 extern "C" void* chk_calloc(int nmemb, size_t size) {
 //  log_message("%s: %s\n", __FILE__, __FUNCTION__);
     size_t total_size = nmemb * size;
-    hdr_t* hdr = static_cast<hdr_t*>(dlcalloc(1, sizeof(hdr_t) + total_size + sizeof(ftr_t)));
+    hdr_t* hdr = static_cast<hdr_t*>(Malloc(calloc)(1, sizeof(hdr_t) + total_size + sizeof(ftr_t)));
     if (hdr) {
         hdr->base = hdr;
         hdr->bt_depth = get_backtrace(hdr->bt, MAX_BACKTRACE_DEPTH);
@@ -497,7 +496,7 @@
 }
 
 extern "C" size_t chk_malloc_usable_size(const void* ptr) {
-    // dlmalloc_usable_size returns 0 for NULL and unknown blocks.
+    // malloc_usable_size returns 0 for NULL and unknown blocks.
     if (ptr == NULL)
         return 0;
 
diff --git a/libc/bionic/malloc_debug_common.cpp b/libc/bionic/malloc_debug_common.cpp
index 8ae0bb5..db3f995 100644
--- a/libc/bionic/malloc_debug_common.cpp
+++ b/libc/bionic/malloc_debug_common.cpp
@@ -46,7 +46,6 @@
 #include <stdlib.h>
 #include <unistd.h>
 
-#include "dlmalloc.h"
 #include "private/ScopedPthreadMutexLocker.h"
 
 /*
@@ -134,7 +133,7 @@
         return;
     }
 
-    HashEntry** list = static_cast<HashEntry**>(dlmalloc(sizeof(void*) * g_hash_table.count));
+    HashEntry** list = static_cast<HashEntry**>(Malloc(malloc)(sizeof(void*) * g_hash_table.count));
 
     // get the entries into an array to be sorted
     int index = 0;
@@ -155,11 +154,11 @@
     *backtraceSize = BACKTRACE_SIZE;
 
     // now get a byte array big enough for this
-    *info = static_cast<uint8_t*>(dlmalloc(*overallSize));
+    *info = static_cast<uint8_t*>(Malloc(malloc)(*overallSize));
 
     if (*info == NULL) {
         *overallSize = 0;
-        dlfree(list);
+        Malloc(free)(list);
         return;
     }
 
@@ -181,42 +180,36 @@
         head += *infoSize;
     }
 
-    dlfree(list);
+    Malloc(free)(list);
 }
 
 // Exported for use by ddms.
 extern "C" void free_malloc_leak_info(uint8_t* info) {
-    dlfree(info);
+    Malloc(free)(info);
 }
 
 extern "C" struct mallinfo mallinfo() {
-    return dlmallinfo();
+    return Malloc(mallinfo)();
 }
 
 extern "C" void* valloc(size_t bytes) {
-    return dlvalloc(bytes);
+    return Malloc(valloc)(bytes);
 }
 
 extern "C" void* pvalloc(size_t bytes) {
-    return dlpvalloc(bytes);
+    return Malloc(pvalloc)(bytes);
 }
 
 extern "C" int posix_memalign(void** memptr, size_t alignment, size_t size) {
-    return dlposix_memalign(memptr, alignment, size);
+    return Malloc(posix_memalign)(memptr, alignment, size);
 }
 
-/* Support for malloc debugging.
- * Note that if USE_DL_PREFIX is not defined, it's assumed that memory
- * allocation routines are implemented somewhere else, so all our custom
- * malloc routines should not be compiled at all.
- */
-#ifdef USE_DL_PREFIX
-
-/* Table for dispatching malloc calls, initialized with default dispatchers. */
+// Support for malloc debugging.
+// Table for dispatching malloc calls, initialized with default dispatchers.
 extern const MallocDebug __libc_malloc_default_dispatch;
 const MallocDebug __libc_malloc_default_dispatch __attribute__((aligned(32))) =
 {
-    dlmalloc, dlfree, dlcalloc, dlrealloc, dlmemalign, dlmalloc_usable_size,
+    Malloc(malloc), Malloc(free), Malloc(calloc), Malloc(realloc), Malloc(memalign), Malloc(malloc_usable_size),
 };
 
 /* Selector of dispatch table to use for dispatching malloc calls. */
@@ -257,7 +250,7 @@
 
 /* Table for dispatching malloc calls, depending on environment. */
 static MallocDebug g_malloc_dispatch_table __attribute__((aligned(32))) = {
-    dlmalloc, dlfree, dlcalloc, dlrealloc, dlmemalign, dlmalloc_usable_size
+    Malloc(malloc), Malloc(free), Malloc(calloc), Malloc(realloc), Malloc(memalign), Malloc(malloc_usable_size)
 };
 
 extern const char* __progname;
@@ -347,8 +340,7 @@
         g_malloc_debug_level = atoi(env);
     }
 
-    /* Debug level 0 means that we should use dlxxx allocation
-     * routines (default). */
+    /* Debug level 0 means that we should use default allocation routines. */
     if (g_malloc_debug_level == 0) {
         return;
     }
@@ -504,7 +496,6 @@
 static pthread_once_t  malloc_fini_once_ctl = PTHREAD_ONCE_INIT;
 
 #endif  // !LIBC_STATIC
-#endif  // USE_DL_PREFIX
 
 /* Initializes memory allocation framework.
  * This routine is called from __libc_init routines implemented
diff --git a/libc/bionic/malloc_debug_common.h b/libc/bionic/malloc_debug_common.h
index 28be042..c1c3c89 100644
--- a/libc/bionic/malloc_debug_common.h
+++ b/libc/bionic/malloc_debug_common.h
@@ -45,11 +45,22 @@
 
 #define MAX_SIZE_T           (~(size_t)0)
 
-// This must match the alignment used by dlmalloc.
+// This must match the alignment used by the malloc implementation.
 #ifndef MALLOC_ALIGNMENT
 #define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
 #endif
 
+#ifdef USE_JEMALLOC
+#include "jemalloc.h"
+#define Malloc(function)  je_ ## function
+#else
+#ifndef USE_DLMALLOC
+#error "Either one of USE_DLMALLOC or USE_JEMALLOC must be defined."
+#endif
+#include "dlmalloc.h"
+#define Malloc(function)  dl ## function
+#endif
+
 // =============================================================================
 // Structures
 // =============================================================================
diff --git a/libc/bionic/malloc_debug_leak.cpp b/libc/bionic/malloc_debug_leak.cpp
index 146cddc..035765f 100644
--- a/libc/bionic/malloc_debug_leak.cpp
+++ b/libc/bionic/malloc_debug_leak.cpp
@@ -46,7 +46,6 @@
 #include <unwind.h>
 
 #include "debug_stacktrace.h"
-#include "dlmalloc.h"
 #include "malloc_debug_common.h"
 
 #include "private/libc_logging.h"
@@ -144,7 +143,7 @@
         entry->allocations++;
     } else {
         // create a new entry
-        entry = static_cast<HashEntry*>(dlmalloc(sizeof(HashEntry) + numEntries*sizeof(uintptr_t)));
+        entry = static_cast<HashEntry*>(Malloc(malloc)(sizeof(HashEntry) + numEntries*sizeof(uintptr_t)));
         if (!entry) {
             return NULL;
         }
@@ -213,11 +212,11 @@
 #define CHK_SENTINEL_VALUE      0xeb
 
 extern "C" void* fill_calloc(size_t n_elements, size_t elem_size) {
-    return dlcalloc(n_elements, elem_size);
+    return Malloc(calloc)(n_elements, elem_size);
 }
 
 extern "C" void* fill_malloc(size_t bytes) {
-    void* buffer = dlmalloc(bytes);
+    void* buffer = Malloc(malloc)(bytes);
     if (buffer) {
         memset(buffer, CHK_SENTINEL_VALUE, bytes);
     }
@@ -225,17 +224,17 @@
 }
 
 extern "C" void fill_free(void* mem) {
-    size_t bytes = dlmalloc_usable_size(mem);
+    size_t bytes = Malloc(malloc_usable_size)(mem);
     memset(mem, CHK_FILL_FREE, bytes);
-    dlfree(mem);
+    Malloc(free)(mem);
 }
 
 extern "C" void* fill_realloc(void* mem, size_t bytes) {
-    size_t oldSize = dlmalloc_usable_size(mem);
-    void* newMem = dlrealloc(mem, bytes);
+    size_t oldSize = Malloc(malloc_usable_size)(mem);
+    void* newMem = Malloc(realloc)(mem, bytes);
     if (newMem) {
         // If this is larger than before, fill the extra with our pattern.
-        size_t newSize = dlmalloc_usable_size(newMem);
+        size_t newSize = Malloc(malloc_usable_size)(newMem);
         if (newSize > oldSize) {
             memset(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(newMem)+oldSize), CHK_FILL_FREE, newSize-oldSize);
         }
@@ -244,7 +243,7 @@
 }
 
 extern "C" void* fill_memalign(size_t alignment, size_t bytes) {
-    void* buffer = dlmemalign(alignment, bytes);
+    void* buffer = Malloc(memalign)(alignment, bytes);
     if (buffer) {
         memset(buffer, CHK_SENTINEL_VALUE, bytes);
     }
@@ -254,7 +253,7 @@
 extern "C" size_t fill_malloc_usable_size(const void* mem) {
     // Since we didn't allocate extra bytes before or after, we can
     // report the normal usable size here.
-    return dlmalloc_usable_size(mem);
+    return Malloc(malloc_usable_size)(mem);
 }
 
 // =============================================================================
@@ -275,7 +274,7 @@
         return NULL;
     }
 
-    void* base = dlmalloc(size);
+    void* base = Malloc(malloc)(size);
     if (base != NULL) {
         ScopedPthreadMutexLocker locker(&g_allocations_mutex);
 
@@ -316,11 +315,11 @@
             entry->allocations--;
             if (entry->allocations <= 0) {
                 remove_entry(entry);
-                dlfree(entry);
+                Malloc(free)(entry);
             }
 
             // now free the memory!
-            dlfree(header);
+            Malloc(free)(header);
         } else {
             debug_log("WARNING bad header guard: '0x%x'! and invalid entry: %p\n",
                     header->guard, header->entry);
@@ -425,7 +424,7 @@
             return 0;
         }
 
-        size_t ret = dlmalloc_usable_size(header);
+        size_t ret = Malloc(malloc_usable_size)(header);
         if (ret != 0) {
             // The usable area starts at 'mem' and stops at 'header+ret'.
             return reinterpret_cast<uintptr_t>(header) + ret - reinterpret_cast<uintptr_t>(mem);
diff --git a/libc/bionic/malloc_debug_qemu.cpp b/libc/bionic/malloc_debug_qemu.cpp
index 5a91daa..ac60c3b 100644
--- a/libc/bionic/malloc_debug_qemu.cpp
+++ b/libc/bionic/malloc_debug_qemu.cpp
@@ -50,7 +50,6 @@
 #include <pthread.h>
 #include <unistd.h>
 #include <errno.h>
-#include "dlmalloc.h"
 #include "private/libc_logging.h"
 #include "malloc_debug_common.h"
 
@@ -344,7 +343,7 @@
  * has been initialized. */
 static uint32_t malloc_pid = 0;
 
-/* Memory allocation alignment that is used in dlmalloc.
+/* Memory allocation alignment that is used in the malloc implementation.
  * This variable is updated by memcheck_initialize routine. */
 static uint32_t malloc_alignment = 8;
 
@@ -677,14 +676,14 @@
 extern "C" void* qemu_instrumented_malloc(size_t bytes) {
     MallocDesc desc;
 
-    /* Initialize block descriptor and allocate memory. Note that dlmalloc
+    /* Initialize block descriptor and allocate memory. Note that malloc
      * returns a valid pointer on zero allocation. Lets mimic this behavior. */
     desc.prefix_size = DEFAULT_PREFIX_SIZE;
     desc.requested_bytes = bytes;
     desc.suffix_size = DEFAULT_SUFFIX_SIZE;
-    desc.ptr = dlmalloc(mallocdesc_alloc_size(&desc));
+    desc.ptr = Malloc(malloc)(mallocdesc_alloc_size(&desc));
     if (desc.ptr == NULL) {
-        qemu_error_log("<libc_pid=%03u, pid=%03u> malloc(%zd): dlmalloc(%u) failed.",
+        qemu_error_log("<libc_pid=%03u, pid=%03u> malloc(%zd): malloc(%u) failed.",
                   malloc_pid, getpid(), bytes, mallocdesc_alloc_size(&desc));
         return NULL;
     }
@@ -693,7 +692,7 @@
     if (notify_qemu_malloc(&desc)) {
         log_mdesc(error, &desc, "<libc_pid=%03u, pid=%03u>: malloc: notify_malloc failed for ",
                   malloc_pid, getpid());
-        dlfree(desc.ptr);
+        Malloc(free)(desc.ptr);
         return NULL;
     } else {
 #if TEST_ACCESS_VIOLATIONS
@@ -714,7 +713,7 @@
 
     if (mem == NULL) {
         // Just let go NULL free
-        dlfree(mem);
+        Malloc(free)(mem);
         return;
     }
 
@@ -745,7 +744,7 @@
     } else {
         log_mdesc(info, &desc, "--- <libc_pid=%03u, pid=%03u> free(%p) -> ",
                   malloc_pid, getpid(), mem);
-        dlfree(desc.ptr);
+        Malloc(free)(desc.ptr);
     }
 }
 
@@ -795,9 +794,9 @@
         total_elements++;
         desc.suffix_size += (elem_size - total_size);
     }
-    desc.ptr = dlcalloc(total_elements, elem_size);
+    desc.ptr = Malloc(calloc)(total_elements, elem_size);
     if (desc.ptr == NULL) {
-        error_log("<libc_pid=%03u, pid=%03u> calloc: dlcalloc(%zd(%zd), %zd) (prx=%u, sfx=%u) failed.",
+        error_log("<libc_pid=%03u, pid=%03u> calloc: calloc(%zd(%zd), %zd) (prx=%u, sfx=%u) failed.",
                    malloc_pid, getpid(), n_elements, total_elements, elem_size,
                    desc.prefix_size, desc.suffix_size);
         return NULL;
@@ -806,7 +805,7 @@
     if (notify_qemu_malloc(&desc)) {
         log_mdesc(error, &desc, "<libc_pid=%03u, pid=%03u>: calloc(%zd(%zd), %zd): notify_malloc failed for ",
                   malloc_pid, getpid(), n_elements, total_elements, elem_size);
-        dlfree(desc.ptr);
+        Malloc(free)(desc.ptr);
         return NULL;
     } else {
 #if TEST_ACCESS_VIOLATIONS
@@ -843,7 +842,7 @@
                  malloc_pid, getpid(), mem, bytes);
         qemu_instrumented_free(mem);
 
-        // This is what dlrealloc does for a "free" realloc.
+        // This is what realloc does for a "free" realloc.
         return NULL;
     }
 
@@ -877,9 +876,9 @@
     new_desc.prefix_size = DEFAULT_PREFIX_SIZE;
     new_desc.requested_bytes = bytes;
     new_desc.suffix_size = DEFAULT_SUFFIX_SIZE;
-    new_desc.ptr = dlmalloc(mallocdesc_alloc_size(&new_desc));
+    new_desc.ptr = Malloc(malloc)(mallocdesc_alloc_size(&new_desc));
     if (new_desc.ptr == NULL) {
-        log_mdesc(error, &cur_desc, "<libc_pid=%03u, pid=%03u>: realloc(%p, %zd): dlmalloc(%u) failed on ",
+        log_mdesc(error, &cur_desc, "<libc_pid=%03u, pid=%03u>: realloc(%p, %zd): malloc(%u) failed on ",
                   malloc_pid, getpid(), mem, bytes,
                   mallocdesc_alloc_size(&new_desc));
         return NULL;
@@ -898,7 +897,7 @@
         log_mdesc(error, &new_desc, "<libc_pid=%03u, pid=%03u>: realloc(%p, %zd) notify_malloc failed -> ",
                   malloc_pid, getpid(), mem, bytes);
         log_mdesc(error, &cur_desc, "                                                                <- ");
-        dlfree(new_desc.ptr);
+        Malloc(free)(new_desc.ptr);
         return NULL;
     }
 
@@ -913,10 +912,10 @@
         /* Since we registered new decriptor with the emulator, we need
          * to unregister it before freeing newly allocated block. */
         notify_qemu_free(mallocdesc_user_ptr(&new_desc));
-        dlfree(new_desc.ptr);
+        Malloc(free)(new_desc.ptr);
         return NULL;
     }
-    dlfree(cur_desc.ptr);
+    Malloc(free)(cur_desc.ptr);
 
     log_mdesc(info, &new_desc, "=== <libc_pid=%03u, pid=%03u>: realloc(%p, %zd) -> ",
               malloc_pid, getpid(), mem, bytes);
@@ -946,9 +945,9 @@
                                                          DEFAULT_PREFIX_SIZE;
     desc.requested_bytes = bytes;
     desc.suffix_size = DEFAULT_SUFFIX_SIZE;
-    desc.ptr = dlmemalign(desc.prefix_size, mallocdesc_alloc_size(&desc));
+    desc.ptr = Malloc(memalign)(desc.prefix_size, mallocdesc_alloc_size(&desc));
     if (desc.ptr == NULL) {
-        error_log("<libc_pid=%03u, pid=%03u> memalign(%zx, %zd): dlmalloc(%u) failed.",
+        error_log("<libc_pid=%03u, pid=%03u> memalign(%zx, %zd): malloc(%u) failed.",
                   malloc_pid, getpid(), alignment, bytes,
                   mallocdesc_alloc_size(&desc));
         return NULL;
@@ -956,7 +955,7 @@
     if (notify_qemu_malloc(&desc)) {
         log_mdesc(error, &desc, "<libc_pid=%03u, pid=%03u>: memalign(%zx, %zd): notify_malloc failed for ",
                   malloc_pid, getpid(), alignment, bytes);
-        dlfree(desc.ptr);
+        Malloc(free)(desc.ptr);
         return NULL;
     }
 
diff --git a/libc/bionic/sysconf.cpp b/libc/bionic/sysconf.cpp
index 46874cc..8309f08 100644
--- a/libc/bionic/sysconf.cpp
+++ b/libc/bionic/sysconf.cpp
@@ -309,7 +309,7 @@
       return _POSIX_THREAD_DESTRUCTOR_ITERATIONS;
 
     case _SC_THREAD_KEYS_MAX:
-      return (BIONIC_TLS_SLOTS - TLS_SLOT_FIRST_USER_SLOT - GLOBAL_INIT_THREAD_LOCAL_BUFFER_COUNT);
+      return (BIONIC_TLS_SLOTS - TLS_SLOT_FIRST_USER_SLOT - BIONIC_TLS_RESERVED_SLOTS);
 
     case _SC_THREAD_STACK_MIN:    return PTHREAD_STACK_MIN;
     case _SC_THREAD_THREADS_MAX:  return SYSTEM_THREAD_THREADS_MAX;
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index d0a0201..c2cf196 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -82,6 +82,13 @@
  */
 #define GLOBAL_INIT_THREAD_LOCAL_BUFFER_COUNT 5
 
+#if defined(USE_JEMALLOC)
+/* jemalloc uses 5 keys for itself. */
+#define BIONIC_TLS_RESERVED_SLOTS (GLOBAL_INIT_THREAD_LOCAL_BUFFER_COUNT + 5)
+#else
+#define BIONIC_TLS_RESERVED_SLOTS GLOBAL_INIT_THREAD_LOCAL_BUFFER_COUNT
+#endif
+
 #define BIONIC_ALIGN(x, a) (((x) + (a - 1)) & ~(a - 1))
 
 /*
@@ -89,7 +96,7 @@
  * This includes space for pthread keys and our own internal slots.
  * We need to round up to maintain stack alignment.
  */
-#define BIONIC_TLS_SLOTS BIONIC_ALIGN(PTHREAD_KEYS_MAX + TLS_SLOT_FIRST_USER_SLOT + GLOBAL_INIT_THREAD_LOCAL_BUFFER_COUNT, 4)
+#define BIONIC_TLS_SLOTS BIONIC_ALIGN(PTHREAD_KEYS_MAX + TLS_SLOT_FIRST_USER_SLOT + BIONIC_TLS_RESERVED_SLOTS, 4)
 
 __END_DECLS
 
diff --git a/tests/Android.mk b/tests/Android.mk
index 51f10ca..37aeec3 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -40,6 +40,10 @@
 
 test_cflags += -D__STDC_LIMIT_MACROS  # For glibc.
 
+ifeq ($(MALLOC_IMPL),jemalloc)
+test_cflags += -DUSE_JEMALLOC
+endif
+
 test_cppflags = \
     -std=gnu++11 \
 
diff --git a/tests/malloc_test.cpp b/tests/malloc_test.cpp
index 259853d..12a5ffa 100644
--- a/tests/malloc_test.cpp
+++ b/tests/malloc_test.cpp
@@ -46,7 +46,7 @@
   for (size_t i = 0; i <= 12; i++) {
     for (size_t alignment = 1 << i; alignment < (1U << (i+1)); alignment++) {
       char *ptr = (char*)memalign(alignment, 100);
-      ASSERT_TRUE(ptr != NULL);
+      ASSERT_TRUE(ptr != NULL) << alignment;
       ASSERT_LE(100U, malloc_usable_size(ptr));
       ASSERT_EQ(0, (intptr_t)ptr % (1 << i));
 
@@ -233,3 +233,18 @@
 
   free(ptr);
 }
+
+TEST(malloc, posix_memalign_non_power2) {
+  void* ptr;
+
+  ASSERT_EQ(EINVAL, posix_memalign(&ptr, 17, 1024));
+}
+
+TEST(malloc, memalign_non_power2) {
+  void* ptr;
+  for (size_t align = 0; align <= 256; align++) {
+    ptr = memalign(align, 1024);
+    ASSERT_TRUE(ptr != NULL) << "Failed at align " << align;
+    free(ptr);
+  }
+}