Implement malloc_usable_size for debug impls.

- Implemented chk_memalign.
- Fixed a few bugs in leak_memalign.
- Implemented {leak,fill,check,qemu}_malloc_usable_size.
- Make malloc_usable_size update at run time.
- Add malloc_test.cpp as a small set of tests for the
  malloc debug routines.
- Fix the qemu routines since it's been broken since it moved to C++.
- Add support for the %u format to the out_vformat in libc_logging.cpp.
  This is used by the emulator code.

Tested using the bionic-unit-tests with setprop libc.debug.malloc
set to 1, 5, and 10.

I tested as much as possible on the emulator, but tracing doesn't appear
to be working properly.

Bug: 6143477

Merge change from internal master.

(cherry-picked from commit 3d594c258045783fc9e1956ce7a4d91e302f011e)

Change-Id: I4ae00fffba82315a8c283f35893fd554460722fb
diff --git a/libc/bionic/libc_logging.cpp b/libc/bionic/libc_logging.cpp
index 8de1192..74e599c 100644
--- a/libc/bionic/libc_logging.cpp
+++ b/libc/bionic/libc_logging.cpp
@@ -348,7 +348,7 @@
             buffer[0] = '0';
             buffer[1] = 'x';
             format_integer(buffer + 2, sizeof(buffer) - 2, value, 'x');
-        } else if (c == 'd' || c == 'i' || c == 'o' || c == 'x' || c == 'X') {
+        } else if (c == 'd' || c == 'i' || c == 'o' || c == 'u' || c == 'x' || c == 'X') {
             /* integers - first read value from stack */
             uint64_t value;
             int is_signed = (c == 'd' || c == 'i' || c == 'o');
diff --git a/libc/bionic/malloc_debug_check.cpp b/libc/bionic/malloc_debug_check.cpp
index 91cf287..11a6ec1 100644
--- a/libc/bionic/malloc_debug_check.cpp
+++ b/libc/bionic/malloc_debug_check.cpp
@@ -74,6 +74,10 @@
 
 struct hdr_t {
     uint32_t tag;
+    void* base;  // Always points to the memory allocated using dlmalloc.
+                 // For memory allocated in chk_memalign, this value will
+                 // not be the same as the location of the start of this
+                 // structure.
     hdr_t* prev;
     hdr_t* next;
     uintptr_t bt[MAX_BACKTRACE_DEPTH];
@@ -82,7 +86,7 @@
     int freed_bt_depth;
     size_t size;
     char front_guard[FRONT_GUARD_LEN];
-} __attribute__((packed));
+} __attribute__((packed, aligned(MALLOC_ALIGNMENT)));
 
 struct ftr_t {
     char rear_guard[REAR_GUARD_LEN];
@@ -100,21 +104,26 @@
     return reinterpret_cast<hdr_t*>(user) - 1;
 }
 
+static inline const hdr_t* const_meta(const void* user) {
+    return reinterpret_cast<const hdr_t*>(user) - 1;
+}
+
+
 static unsigned gAllocatedBlockCount;
-static hdr_t *tail;
-static hdr_t *head;
+static hdr_t* tail;
+static hdr_t* head;
 static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
 
 static unsigned backlog_num;
-static hdr_t *backlog_tail;
-static hdr_t *backlog_head;
+static hdr_t* backlog_tail;
+static hdr_t* backlog_head;
 static pthread_mutex_t backlog_lock = PTHREAD_MUTEX_INITIALIZER;
 
-static inline void init_front_guard(hdr_t *hdr) {
+static inline void init_front_guard(hdr_t* hdr) {
     memset(hdr->front_guard, FRONT_GUARD, FRONT_GUARD_LEN);
 }
 
-static inline bool is_front_guard_valid(hdr_t *hdr) {
+static inline bool is_front_guard_valid(hdr_t* hdr) {
     for (size_t i = 0; i < FRONT_GUARD_LEN; i++) {
         if (hdr->front_guard[i] != FRONT_GUARD) {
             return 0;
@@ -123,12 +132,12 @@
     return 1;
 }
 
-static inline void init_rear_guard(hdr_t *hdr) {
+static inline void init_rear_guard(hdr_t* hdr) {
     ftr_t* ftr = to_ftr(hdr);
     memset(ftr->rear_guard, REAR_GUARD, REAR_GUARD_LEN);
 }
 
-static inline bool is_rear_guard_valid(hdr_t *hdr) {
+static inline bool is_rear_guard_valid(hdr_t* hdr) {
     unsigned i;
     int valid = 1;
     int first_mismatch = -1;
@@ -149,7 +158,7 @@
     return valid;
 }
 
-static inline void add_locked(hdr_t *hdr, hdr_t **tail, hdr_t **head) {
+static inline void add_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) {
     hdr->prev = NULL;
     hdr->next = *head;
     if (*head)
@@ -159,7 +168,7 @@
     *head = hdr;
 }
 
-static inline int del_locked(hdr_t *hdr, hdr_t **tail, hdr_t **head) {
+static inline int del_locked(hdr_t* hdr, hdr_t** tail, hdr_t** head) {
     if (hdr->prev) {
         hdr->prev->next = hdr->next;
     } else {
@@ -173,7 +182,7 @@
     return 0;
 }
 
-static inline void add(hdr_t *hdr, size_t size) {
+static inline void add(hdr_t* hdr, size_t size) {
     ScopedPthreadMutexLocker locker(&lock);
     hdr->tag = ALLOCATION_TAG;
     hdr->size = size;
@@ -183,7 +192,7 @@
     add_locked(hdr, &tail, &head);
 }
 
-static inline int del(hdr_t *hdr) {
+static inline int del(hdr_t* hdr) {
     if (hdr->tag != ALLOCATION_TAG) {
         return -1;
     }
@@ -194,13 +203,13 @@
     return 0;
 }
 
-static inline void poison(hdr_t *hdr) {
+static inline void poison(hdr_t* hdr) {
     memset(user(hdr), FREE_POISON, hdr->size);
 }
 
-static int was_used_after_free(hdr_t *hdr) {
+static int was_used_after_free(hdr_t* hdr) {
     unsigned i;
-    const char *data = (const char *)user(hdr);
+    const char* data = reinterpret_cast<const char *>(user(hdr));
     for (i = 0; i < hdr->size; i++)
         if (data[i] != FREE_POISON)
             return 1;
@@ -208,7 +217,7 @@
 }
 
 /* returns 1 if valid, *safe == 1 if safe to dump stack */
-static inline int check_guards(hdr_t *hdr, int *safe) {
+static inline int check_guards(hdr_t* hdr, int* safe) {
     *safe = 1;
     if (!is_front_guard_valid(hdr)) {
         if (hdr->front_guard[0] == FRONT_GUARD) {
@@ -233,7 +242,7 @@
 }
 
 /* returns 1 if valid, *safe == 1 if safe to dump stack */
-static inline int check_allocation_locked(hdr_t *hdr, int *safe) {
+static inline int check_allocation_locked(hdr_t* hdr, int* safe) {
     int valid = 1;
     *safe = 1;
 
@@ -270,9 +279,9 @@
     return valid;
 }
 
-static inline int del_and_check_locked(hdr_t *hdr,
-                                       hdr_t **tail, hdr_t **head, unsigned *cnt,
-                                       int *safe) {
+static inline int del_and_check_locked(hdr_t* hdr,
+                                       hdr_t** tail, hdr_t** head, unsigned* cnt,
+                                       int* safe) {
     int valid = check_allocation_locked(hdr, safe);
     if (safe) {
         (*cnt)--;
@@ -281,7 +290,7 @@
     return valid;
 }
 
-static inline void del_from_backlog_locked(hdr_t *hdr) {
+static inline void del_from_backlog_locked(hdr_t* hdr) {
     int safe;
     del_and_check_locked(hdr,
                          &backlog_tail, &backlog_head, &backlog_num,
@@ -289,17 +298,17 @@
     hdr->tag = 0; /* clear the tag */
 }
 
-static inline void del_from_backlog(hdr_t *hdr) {
+static inline void del_from_backlog(hdr_t* hdr) {
     ScopedPthreadMutexLocker locker(&backlog_lock);
     del_from_backlog_locked(hdr);
 }
 
-static inline int del_leak(hdr_t *hdr, int *safe) {
+static inline int del_leak(hdr_t* hdr, int* safe) {
     ScopedPthreadMutexLocker locker(&lock);
     return del_and_check_locked(hdr, &tail, &head, &gAllocatedBlockCount, safe);
 }
 
-static inline void add_to_backlog(hdr_t *hdr) {
+static inline void add_to_backlog(hdr_t* hdr) {
     ScopedPthreadMutexLocker locker(&backlog_lock);
     hdr->tag = BACKLOG_TAG;
     backlog_num++;
@@ -307,9 +316,9 @@
     poison(hdr);
     /* If we've exceeded the maximum backlog, clear it up */
     while (backlog_num > gMallocDebugBacklog) {
-        hdr_t *gone = backlog_tail;
+        hdr_t* gone = backlog_tail;
         del_from_backlog_locked(gone);
-        dlfree(gone);
+        dlfree(gone->base);
     }
 }
 
@@ -318,6 +327,7 @@
 
     hdr_t* hdr = static_cast<hdr_t*>(dlmalloc(sizeof(hdr_t) + size + sizeof(ftr_t)));
     if (hdr) {
+        hdr->base = hdr;
         hdr->bt_depth = get_backtrace(hdr->bt, MAX_BACKTRACE_DEPTH);
         add(hdr, size);
         return user(hdr);
@@ -325,13 +335,44 @@
     return NULL;
 }
 
-extern "C" void* chk_memalign(size_t, size_t bytes) {
-//  log_message("%s: %s\n", __FILE__, __FUNCTION__);
-    // XXX: it's better to use malloc, than being wrong
-    return chk_malloc(bytes);
+extern "C" void* chk_memalign(size_t alignment, size_t bytes) {
+    if (alignment <= MALLOC_ALIGNMENT) {
+        return chk_malloc(bytes);
+    }
+
+    // Make the alignment a power of two.
+    if (alignment & (alignment-1)) {
+        alignment = 1L << (31 - __builtin_clz(alignment));
+    }
+
+    // here, alignment is at least MALLOC_ALIGNMENT<<1 bytes
+    // we will align by at least MALLOC_ALIGNMENT bytes
+    // and at most alignment-MALLOC_ALIGNMENT bytes
+    size_t size = (alignment-MALLOC_ALIGNMENT) + bytes;
+    if (size < bytes) { // Overflow.
+        return NULL;
+    }
+
+    void* base = dlmalloc(sizeof(hdr_t) + size + sizeof(ftr_t));
+    if (base != NULL) {
+        // Check that the actual pointer that will be returned is aligned
+        // properly.
+        uintptr_t ptr = reinterpret_cast<uintptr_t>(user(reinterpret_cast<hdr_t*>(base)));
+        if ((ptr % alignment) != 0) {
+            // Align the pointer.
+            ptr += ((-ptr) % alignment);
+        }
+
+        hdr_t* hdr = meta(reinterpret_cast<void*>(ptr));
+        hdr->base = base;
+        hdr->bt_depth = get_backtrace(hdr->bt, MAX_BACKTRACE_DEPTH);
+        add(hdr, bytes);
+        return user(hdr);
+    }
+    return base;
 }
 
-extern "C" void chk_free(void *ptr) {
+extern "C" void chk_free(void* ptr) {
 //  log_message("%s: %s\n", __FILE__, __FUNCTION__);
 
     if (!ptr) /* ignore free(NULL) */
@@ -366,7 +407,7 @@
     }
 }
 
-extern "C" void *chk_realloc(void *ptr, size_t size) {
+extern "C" void* chk_realloc(void* ptr, size_t size) {
 //  log_message("%s: %s\n", __FILE__, __FUNCTION__);
 
     if (!ptr) {
@@ -414,8 +455,23 @@
         }
     }
 
-    hdr = static_cast<hdr_t*>(dlrealloc(hdr, sizeof(hdr_t) + size + sizeof(ftr_t)));
+    if (hdr->base != hdr) {
+        // An allocation from memalign, so create another allocation and
+        // copy the data out.
+        void* newMem = dlmalloc(sizeof(hdr_t) + size + sizeof(ftr_t));
+        if (newMem) {
+            memcpy(newMem, hdr, sizeof(hdr_t) + hdr->size);
+            dlfree(hdr->base);
+            hdr = static_cast<hdr_t*>(newMem);
+        } else {
+            dlfree(hdr->base);
+            hdr = NULL;
+        }
+    } else {
+        hdr = static_cast<hdr_t*>(dlrealloc(hdr, sizeof(hdr_t) + size + sizeof(ftr_t)));
+    }
     if (hdr) {
+        hdr->base = hdr;
         hdr->bt_depth = get_backtrace(hdr->bt, MAX_BACKTRACE_DEPTH);
         add(hdr, size);
         return user(hdr);
@@ -424,11 +480,12 @@
     return NULL;
 }
 
-extern "C" void *chk_calloc(int nmemb, size_t size) {
+extern "C" void* chk_calloc(int nmemb, size_t size) {
 //  log_message("%s: %s\n", __FILE__, __FUNCTION__);
     size_t total_size = nmemb * size;
     hdr_t* hdr = static_cast<hdr_t*>(dlcalloc(1, sizeof(hdr_t) + total_size + sizeof(ftr_t)));
     if (hdr) {
+        hdr->base = hdr;
         hdr->bt_depth = get_backtrace(hdr->bt, MAX_BACKTRACE_DEPTH);
         add(hdr, total_size);
         return user(hdr);
@@ -436,6 +493,18 @@
     return NULL;
 }
 
+extern "C" size_t chk_malloc_usable_size(const void* ptr) {
+    // dlmalloc_usable_size returns 0 for NULL and unknown blocks.
+    if (ptr == NULL)
+        return 0;
+
+    const hdr_t* hdr = const_meta(ptr);
+
+    // The sentinel tail is written just after the request block bytes
+    // so there is no extra room we can report here.
+    return hdr->size;
+}
+
 static void ReportMemoryLeaks() {
   // We only track leaks at level 10.
   if (gMallocDebugLevel != 10) {
diff --git a/libc/bionic/malloc_debug_common.cpp b/libc/bionic/malloc_debug_common.cpp
index 9cc84c3..ccceb14 100644
--- a/libc/bionic/malloc_debug_common.cpp
+++ b/libc/bionic/malloc_debug_common.cpp
@@ -190,10 +190,6 @@
     return dlmallinfo();
 }
 
-extern "C" size_t malloc_usable_size(const void* mem) {
-    return dlmalloc_usable_size(mem);
-}
-
 extern "C" void* valloc(size_t bytes) {
     return dlvalloc(bytes);
 }
@@ -215,8 +211,9 @@
 
 /* Table for dispatching malloc calls, initialized with default dispatchers. */
 extern const MallocDebug __libc_malloc_default_dispatch;
-const MallocDebug __libc_malloc_default_dispatch __attribute__((aligned(32))) = {
-    dlmalloc, dlfree, dlcalloc, dlrealloc, dlmemalign
+const MallocDebug __libc_malloc_default_dispatch __attribute__((aligned(32))) =
+{
+    dlmalloc, dlfree, dlcalloc, dlrealloc, dlmemalign, dlmalloc_usable_size,
 };
 
 /* Selector of dispatch table to use for dispatching malloc calls. */
@@ -242,6 +239,10 @@
     return __libc_malloc_dispatch->memalign(alignment, bytes);
 }
 
+extern "C" size_t malloc_usable_size(const void* mem) {
+    return __libc_malloc_dispatch->malloc_usable_size(mem);
+}
+
 /* We implement malloc debugging only in libc.so, so code below
  * must be excluded if we compile this file for static libc.a
  */
@@ -253,7 +254,7 @@
 
 /* Table for dispatching malloc calls, depending on environment. */
 static MallocDebug gMallocUse __attribute__((aligned(32))) = {
-    dlmalloc, dlfree, dlcalloc, dlrealloc, dlmemalign
+    dlmalloc, dlfree, dlcalloc, dlrealloc, dlmemalign, dlmalloc_usable_size
 };
 
 extern const char* __progname;
@@ -276,15 +277,10 @@
  * Actual functionality for debug levels 1-10 is implemented in
  * libc_malloc_debug_leak.so, while functionality for emultor's instrumented
  * allocations is implemented in libc_malloc_debug_qemu.so and can be run inside
-  * the emulator only.
+ * the emulator only.
  */
 static void* libc_malloc_impl_handle = NULL;
 
-// This must match the alignment used by dlmalloc.
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
-#endif
-
 /* This variable is set to the value of property libc.debug.malloc.backlog,
  * when the value of libc.debug.malloc = 10.  It determines the size of the
  * backlog we use to detect multiple frees.  If the property is not set, the
@@ -296,41 +292,26 @@
 /* The value of libc.debug.malloc. */
 int gMallocDebugLevel;
 
-static void InitMalloc(MallocDebug* table, const char* prefix) {
-  __libc_format_log(ANDROID_LOG_INFO, "libc", "%s: using libc.debug.malloc %d (%s)\n",
-                    __progname, gMallocDebugLevel, prefix);
+template<typename FunctionType>
+void InitMallocFunction(void* malloc_impl_handler, FunctionType* func, const char* prefix, const char* suffix) {
+    char symbol[128];
+    snprintf(symbol, sizeof(symbol), "%s_%s", prefix, suffix);
+    *func = reinterpret_cast<FunctionType>(dlsym(malloc_impl_handler, symbol));
+    if (*func == NULL) {
+        error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
+    }
+}
 
-  char symbol[128];
+static void InitMalloc(void* malloc_impl_handler, MallocDebug* table, const char* prefix) {
+    __libc_format_log(ANDROID_LOG_INFO, "libc", "%s: using libc.debug.malloc %d (%s)\n",
+                      __progname, gMallocDebugLevel, prefix);
 
-  snprintf(symbol, sizeof(symbol), "%s_malloc", prefix);
-  table->malloc = reinterpret_cast<MallocDebugMalloc>(dlsym(libc_malloc_impl_handle, symbol));
-  if (table->malloc == NULL) {
-      error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
-  }
-
-  snprintf(symbol, sizeof(symbol), "%s_free", prefix);
-  table->free = reinterpret_cast<MallocDebugFree>(dlsym(libc_malloc_impl_handle, symbol));
-  if (table->free == NULL) {
-      error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
-  }
-
-  snprintf(symbol, sizeof(symbol), "%s_calloc", prefix);
-  table->calloc = reinterpret_cast<MallocDebugCalloc>(dlsym(libc_malloc_impl_handle, symbol));
-  if (table->calloc == NULL) {
-      error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
-  }
-
-  snprintf(symbol, sizeof(symbol), "%s_realloc", prefix);
-  table->realloc = reinterpret_cast<MallocDebugRealloc>(dlsym(libc_malloc_impl_handle, symbol));
-  if (table->realloc == NULL) {
-      error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
-  }
-
-  snprintf(symbol, sizeof(symbol), "%s_memalign", prefix);
-  table->memalign = reinterpret_cast<MallocDebugMemalign>(dlsym(libc_malloc_impl_handle, symbol));
-  if (table->memalign == NULL) {
-      error_log("%s: dlsym(\"%s\") failed", __progname, symbol);
-  }
+    InitMallocFunction<MallocDebugMalloc>(malloc_impl_handler, &table->malloc, prefix, "malloc");
+    InitMallocFunction<MallocDebugFree>(malloc_impl_handler, &table->free, prefix, "free");
+    InitMallocFunction<MallocDebugCalloc>(malloc_impl_handler, &table->calloc, prefix, "calloc");
+    InitMallocFunction<MallocDebugRealloc>(malloc_impl_handler, &table->realloc, prefix, "realloc");
+    InitMallocFunction<MallocDebugMemalign>(malloc_impl_handler, &table->memalign, prefix, "memalign");
+    InitMallocFunction<MallocDebugMallocUsableSize>(malloc_impl_handler, &table->malloc_usable_size, prefix, "malloc_usable_size");
 }
 
 /* Initializes memory allocation framework once per process. */
@@ -422,24 +403,24 @@
     }
 
     // Load .so that implements the required malloc debugging functionality.
-    libc_malloc_impl_handle = dlopen(so_name, RTLD_LAZY);
-    if (libc_malloc_impl_handle == NULL) {
+    void* malloc_impl_handle = dlopen(so_name, RTLD_LAZY);
+    if (malloc_impl_handle == NULL) {
         error_log("%s: Missing module %s required for malloc debug level %d: %s",
                   __progname, so_name, gMallocDebugLevel, dlerror());
         return;
     }
 
     // Initialize malloc debugging in the loaded module.
-    malloc_debug_initialize = reinterpret_cast<MallocDebugInit>(dlsym(libc_malloc_impl_handle,
+    malloc_debug_initialize = reinterpret_cast<MallocDebugInit>(dlsym(malloc_impl_handle,
                                                                       "malloc_debug_initialize"));
     if (malloc_debug_initialize == NULL) {
         error_log("%s: Initialization routine is not found in %s\n",
                   __progname, so_name);
-        dlclose(libc_malloc_impl_handle);
+        dlclose(malloc_impl_handle);
         return;
     }
     if (malloc_debug_initialize() == -1) {
-        dlclose(libc_malloc_impl_handle);
+        dlclose(malloc_impl_handle);
         return;
     }
 
@@ -447,34 +428,35 @@
         // For memory checker we need to do extra initialization.
         typedef int (*MemCheckInit)(int, const char*);
         MemCheckInit memcheck_initialize =
-            reinterpret_cast<MemCheckInit>(dlsym(libc_malloc_impl_handle,
+            reinterpret_cast<MemCheckInit>(dlsym(malloc_impl_handle,
                                                  "memcheck_initialize"));
         if (memcheck_initialize == NULL) {
             error_log("%s: memcheck_initialize routine is not found in %s\n",
                       __progname, so_name);
-            dlclose(libc_malloc_impl_handle);
+            dlclose(malloc_impl_handle);
             return;
         }
 
         if (memcheck_initialize(MALLOC_ALIGNMENT, memcheck_tracing)) {
-            dlclose(libc_malloc_impl_handle);
+            dlclose(malloc_impl_handle);
             return;
         }
     }
 
+
     // Initialize malloc dispatch table with appropriate routines.
     switch (gMallocDebugLevel) {
         case 1:
-            InitMalloc(&gMallocUse, "leak");
+            InitMalloc(malloc_impl_handle, &gMallocUse, "leak");
             break;
         case 5:
-            InitMalloc(&gMallocUse, "fill");
+            InitMalloc(malloc_impl_handle, &gMallocUse, "fill");
             break;
         case 10:
-            InitMalloc(&gMallocUse, "chk");
+            InitMalloc(malloc_impl_handle, &gMallocUse, "chk");
             break;
         case 20:
-            InitMalloc(&gMallocUse, "qemu_instrumented");
+            InitMalloc(malloc_impl_handle, &gMallocUse, "qemu_instrumented");
             break;
         default:
             break;
@@ -485,13 +467,14 @@
         (gMallocUse.free == NULL) ||
         (gMallocUse.calloc == NULL) ||
         (gMallocUse.realloc == NULL) ||
-        (gMallocUse.memalign == NULL)) {
+        (gMallocUse.memalign == NULL) ||
+        (gMallocUse.malloc_usable_size == NULL)) {
         error_log("%s: some symbols for libc.debug.malloc level %d were not found (see above)",
                   __progname, gMallocDebugLevel);
-        dlclose(libc_malloc_impl_handle);
-        libc_malloc_impl_handle = NULL;
+        dlclose(malloc_impl_handle);
     } else {
         __libc_malloc_dispatch = &gMallocUse;
+        libc_malloc_impl_handle = malloc_impl_handle;
     }
 }
 
diff --git a/libc/bionic/malloc_debug_common.h b/libc/bionic/malloc_debug_common.h
index 12d0e65..a3f9909 100644
--- a/libc/bionic/malloc_debug_common.h
+++ b/libc/bionic/malloc_debug_common.h
@@ -45,6 +45,11 @@
 
 #define MAX_SIZE_T           (~(size_t)0)
 
+// This must match the alignment used by dlmalloc.
+#ifndef MALLOC_ALIGNMENT
+#define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
+#endif
+
 // =============================================================================
 // Structures
 // =============================================================================
@@ -71,12 +76,14 @@
 typedef void* (*MallocDebugCalloc)(size_t, size_t);
 typedef void* (*MallocDebugRealloc)(void*, size_t);
 typedef void* (*MallocDebugMemalign)(size_t, size_t);
+typedef size_t (*MallocDebugMallocUsableSize)(const void*);
 struct MallocDebug {
   MallocDebugMalloc malloc;
   MallocDebugFree free;
   MallocDebugCalloc calloc;
   MallocDebugRealloc realloc;
   MallocDebugMemalign memalign;
+  MallocDebugMallocUsableSize malloc_usable_size;
 };
 
 /* Malloc debugging initialization and finalization routines.
diff --git a/libc/bionic/malloc_debug_leak.cpp b/libc/bionic/malloc_debug_leak.cpp
index 2db8a1f..45b45c2 100644
--- a/libc/bionic/malloc_debug_leak.cpp
+++ b/libc/bionic/malloc_debug_leak.cpp
@@ -67,9 +67,6 @@
 // stack trace functions
 // =============================================================================
 
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT    ((size_t)8U)
-#endif
 #define GUARD               0x48151642
 #define DEBUG               0
 
@@ -80,12 +77,16 @@
 struct AllocationEntry {
     HashEntry* entry;
     uint32_t guard;
-};
+} __attribute__((aligned(MALLOC_ALIGNMENT)));
 
-static AllocationEntry* to_header(void* mem) {
+static inline AllocationEntry* to_header(void* mem) {
   return reinterpret_cast<AllocationEntry*>(mem) - 1;
 }
 
+static inline const AllocationEntry* const_to_header(const void* mem) {
+  return reinterpret_cast<const AllocationEntry*>(mem) - 1;
+}
+
 // =============================================================================
 // Hash Table functions
 // =============================================================================
@@ -229,17 +230,16 @@
 }
 
 extern "C" void* fill_realloc(void* mem, size_t bytes) {
-    void* buffer = fill_malloc(bytes);
-    if (mem == NULL) {
-        return buffer;
+    size_t oldSize = dlmalloc_usable_size(mem);
+    void* newMem = dlrealloc(mem, bytes);
+    if (newMem) {
+        // If this is larger than before, fill the extra with our pattern.
+        size_t newSize = dlmalloc_usable_size(newMem);
+        if (newSize > oldSize) {
+            memset(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(newMem)+oldSize), CHK_FILL_FREE, newSize-oldSize);
+        }
     }
-    if (buffer) {
-        size_t old_size = dlmalloc_usable_size(mem);
-        size_t size = (bytes < old_size)?(bytes):(old_size);
-        memcpy(buffer, mem, size);
-        fill_free(mem);
-    }
-    return buffer;
+    return newMem;
 }
 
 extern "C" void* fill_memalign(size_t alignment, size_t bytes) {
@@ -250,11 +250,17 @@
     return buffer;
 }
 
+extern "C" size_t fill_malloc_usable_size(const void* mem) {
+    // Since we didn't allocate extra bytes before or after, we can
+    // report the normal usable size here.
+    return dlmalloc_usable_size(mem);
+}
+
 // =============================================================================
 // malloc leak functions
 // =============================================================================
 
-static void* MEMALIGN_GUARD = reinterpret_cast<void*>(0xA1A41520);
+static uint32_t MEMALIGN_GUARD      = 0xA1A41520;
 
 extern "C" void* leak_malloc(size_t bytes) {
     // allocate enough space infront of the allocation to store the pointer for
@@ -296,9 +302,10 @@
 
         if (header->guard != GUARD) {
             // could be a memaligned block
-            if (reinterpret_cast<void**>(mem)[-1] == MEMALIGN_GUARD) {
-                mem = reinterpret_cast<void**>(mem)[-2];
-                header = to_header(mem);
+            if (header->guard == MEMALIGN_GUARD) {
+                // For memaligned blocks, header->entry points to the memory
+                // allocated through leak_malloc.
+                header = to_header(header->entry);
             }
         }
 
@@ -338,19 +345,26 @@
     if (oldMem == NULL) {
         return leak_malloc(bytes);
     }
+
     void* newMem = NULL;
     AllocationEntry* header = to_header(oldMem);
-    if (header && header->guard == GUARD) {
-        size_t oldSize = header->entry->size & ~SIZE_FLAG_MASK;
-        newMem = leak_malloc(bytes);
-        if (newMem != NULL) {
-            size_t copySize = (oldSize <= bytes) ? oldSize : bytes;
-            memcpy(newMem, oldMem, copySize);
-            leak_free(oldMem);
-        }
-    } else {
-        newMem = dlrealloc(oldMem, bytes);
+    if (header->guard == MEMALIGN_GUARD) {
+        // Get the real header.
+        header = to_header(header->entry);
+    } else if (header->guard != GUARD) {
+        debug_log("WARNING bad header guard: '0x%x'! and invalid entry: %p\n",
+                   header->guard, header->entry);
+        return NULL;
     }
+
+    newMem = leak_malloc(bytes);
+    if (newMem != NULL) {
+        size_t oldSize = header->entry->size & ~SIZE_FLAG_MASK;
+        size_t copySize = (oldSize <= bytes) ? oldSize : bytes;
+        memcpy(newMem, oldMem, copySize);
+    }
+    leak_free(oldMem);
+
     return newMem;
 }
 
@@ -375,7 +389,7 @@
 
     void* base = leak_malloc(size);
     if (base != NULL) {
-        intptr_t ptr = reinterpret_cast<intptr_t>(base);
+        uintptr_t ptr = reinterpret_cast<uintptr_t>(base);
         if ((ptr % alignment) == 0) {
             return base;
         }
@@ -383,11 +397,38 @@
         // align the pointer
         ptr += ((-ptr) % alignment);
 
-        // there is always enough space for the base pointer and the guard
-        reinterpret_cast<void**>(ptr)[-1] = MEMALIGN_GUARD;
-        reinterpret_cast<void**>(ptr)[-2] = base;
+        // Already allocated enough space for the header. This assumes
+        // that the malloc alignment is at least 8, otherwise, this is
+        // not guaranteed to have the space for the header.
+        AllocationEntry* header = to_header(reinterpret_cast<void*>(ptr));
+        header->guard = MEMALIGN_GUARD;
+        header->entry = reinterpret_cast<HashEntry*>(base);
 
         return reinterpret_cast<void*>(ptr);
     }
     return base;
 }
+
+extern "C" size_t leak_malloc_usable_size(const void* mem) {
+    if (mem != NULL) {
+        // Check the guard to make sure it is valid.
+        const AllocationEntry* header = const_to_header((void*)mem);
+
+        if (header->guard == MEMALIGN_GUARD) {
+            // If this is a memalign'd pointer, then grab the header from
+            // entry.
+            header = const_to_header(header->entry);
+        } else if (header->guard != GUARD) {
+            debug_log("WARNING bad header guard: '0x%x'! and invalid entry: %p\n",
+                      header->guard, header->entry);
+            return 0;
+        }
+
+        size_t ret = dlmalloc_usable_size(header);
+        if (ret != 0) {
+            // The usable area starts at 'mem' and stops at 'header+ret'.
+            return reinterpret_cast<uintptr_t>(header) + ret - reinterpret_cast<uintptr_t>(mem);
+        }
+    }
+    return 0;
+}
diff --git a/libc/bionic/malloc_debug_qemu.cpp b/libc/bionic/malloc_debug_qemu.cpp
index 34ddb87..4c666a9 100644
--- a/libc/bionic/malloc_debug_qemu.cpp
+++ b/libc/bionic/malloc_debug_qemu.cpp
@@ -137,7 +137,7 @@
      * will respond with information about allocated block that contains this
      * pointer.
      */
-    void*       ptr;
+    const void*       ptr;
 
     /* Id of the process that initialized libc instance, in which this query
      * is called. This field is used by the emulator to report errors in
@@ -469,7 +469,7 @@
  * Return:
  *  Zero on success, or -1 on failure.
  */
-static inline int query_qemu_malloc_info(void* ptr, MallocDesc* desc, uint32_t routine) {
+static inline int query_qemu_malloc_info(const void* ptr, MallocDesc* desc, uint32_t routine) {
     volatile MallocDescQuery query;
 
     query.ptr = ptr;
@@ -574,11 +574,12 @@
 // API routines
 // =============================================================================
 
-void* qemu_instrumented_malloc(size_t bytes);
-void  qemu_instrumented_free(void* mem);
-void* qemu_instrumented_calloc(size_t n_elements, size_t elem_size);
-void* qemu_instrumented_realloc(void* mem, size_t bytes);
-void* qemu_instrumented_memalign(size_t alignment, size_t bytes);
+extern "C" void* qemu_instrumented_malloc(size_t bytes);
+extern "C" void  qemu_instrumented_free(void* mem);
+extern "C" void* qemu_instrumented_calloc(size_t n_elements, size_t elem_size);
+extern "C" void* qemu_instrumented_realloc(void* mem, size_t bytes);
+extern "C" void* qemu_instrumented_memalign(size_t alignment, size_t bytes);
+extern "C" size_t qemu_instrumented_malloc_usable_size(const void* mem);
 
 /* Initializes malloc debugging instrumentation for the emulator.
  * This routine is called from malloc_init_impl routine implemented in
@@ -589,7 +590,7 @@
  * Return:
  *  0 on success, or -1 on failure.
 */
-int malloc_debug_initialize() {
+extern "C" int malloc_debug_initialize() {
     /* We will be using emulator's magic page to report memory allocation
      * activities. In essence, what magic page does, it translates writes to
      * the memory mapped spaces into writes to an I/O port that emulator
@@ -627,7 +628,7 @@
  * Return:
  *  0 on success, or -1 on failure.
 */
-int memcheck_initialize(int alignment, const char* memcheck_param) {
+extern "C" int memcheck_initialize(int alignment, const char* memcheck_param) {
     malloc_alignment = alignment;
 
     /* Parse -memcheck parameter for the guest tracing flags. */
@@ -673,7 +674,7 @@
  * bytes (plus prefix, and suffix guards), and report allocation to the
  * emulator.
  */
-void* qemu_instrumented_malloc(size_t bytes) {
+extern "C" void* qemu_instrumented_malloc(size_t bytes) {
     MallocDesc desc;
 
     /* Initialize block descriptor and allocate memory. Note that dlmalloc
@@ -708,7 +709,7 @@
  * Primary responsibility of this routine is to free requested memory, and
  * report free block to the emulator.
  */
-void qemu_instrumented_free(void* mem) {
+extern "C" void qemu_instrumented_free(void* mem) {
     MallocDesc desc;
 
     if (mem == NULL) {
@@ -751,7 +752,7 @@
 /* This routine serves as entry point for 'calloc'.
  * This routine behaves similarly to qemu_instrumented_malloc.
  */
-void* qemu_instrumented_calloc(size_t n_elements, size_t elem_size) {
+extern "C" void* qemu_instrumented_calloc(size_t n_elements, size_t elem_size) {
     if (n_elements == 0 || elem_size == 0) {
         // Just let go zero bytes allocation.
         qemu_info_log("::: <libc_pid=%03u, pid=%03u>: Zero calloc redir to malloc",
@@ -823,7 +824,7 @@
  * allocation, but overall it doesn't seem to matter, as caller of realloc
  * should not expect that pointer returned after shrinking will remain the same.
  */
-void* qemu_instrumented_realloc(void* mem, size_t bytes) {
+extern "C" void* qemu_instrumented_realloc(void* mem, size_t bytes) {
     MallocDesc new_desc;
     MallocDesc cur_desc;
     size_t to_copy;
@@ -927,7 +928,7 @@
 /* This routine serves as entry point for 'memalign'.
  * This routine behaves similarly to qemu_instrumented_malloc.
  */
-void* qemu_instrumented_memalign(size_t alignment, size_t bytes) {
+extern "C" void* qemu_instrumented_memalign(size_t alignment, size_t bytes) {
     MallocDesc desc;
 
     if (bytes == 0) {
@@ -967,3 +968,27 @@
               malloc_pid, getpid(), alignment, bytes);
     return mallocdesc_user_ptr(&desc);
 }
+
+extern "C" size_t qemu_instrumented_malloc_usable_size(const void* mem) {
+    MallocDesc cur_desc;
+
+    // Query emulator for the reallocating block information.
+    if (query_qemu_malloc_info(mem, &cur_desc, 2)) {
+        // Note that this violation should be already caught in the emulator.
+        error_log("<libc_pid=%03u, pid=%03u>: malloc_usable_size(%p) query_info failed.",
+                  malloc_pid, getpid(), mem);
+        return 0;
+    }
+
+    /* Make sure that reallocating pointer value is what we would expect
+     * for this memory block. Note that this violation should be already caught
+     * in the emulator.*/
+    if (mem != mallocdesc_user_ptr(&cur_desc)) {
+        log_mdesc(error, &cur_desc, "<libc_pid=%03u, pid=%03u>: malloc_usable_size(%p) is invalid for ",
+                  malloc_pid, getpid(), mem);
+        return 0;
+    }
+
+    /* during instrumentation, we can't really report anything more than requested_bytes */
+    return cur_desc.requested_bytes;
+}
diff --git a/tests/Android.mk b/tests/Android.mk
index 45cb462..875746d 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -66,6 +66,7 @@
     getcwd_test.cpp \
     libc_logging_test.cpp \
     libgen_test.cpp \
+    malloc_test.cpp \
     math_test.cpp \
     netdb_test.cpp \
     pthread_test.cpp \
diff --git a/tests/malloc_test.cpp b/tests/malloc_test.cpp
new file mode 100644
index 0000000..259853d
--- /dev/null
+++ b/tests/malloc_test.cpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <stdlib.h>
+#include <malloc.h>
+
+TEST(malloc, malloc_std) {
+  // Simple malloc test.
+  void *ptr = malloc(100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+
+  free(ptr);
+}
+
+TEST(malloc, calloc_std) {
+  // Simple calloc test.
+  size_t alloc_len = 100;
+  char *ptr = (char *)calloc(1, alloc_len);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(alloc_len, malloc_usable_size(ptr));
+  for (size_t i = 0; i < alloc_len; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  free(ptr);
+}
+
+TEST(malloc, memalign_multiple) {
+  // Memalign test where the alignment is any value.
+  for (size_t i = 0; i <= 12; i++) {
+    for (size_t alignment = 1 << i; alignment < (1U << (i+1)); alignment++) {
+      char *ptr = (char*)memalign(alignment, 100);
+      ASSERT_TRUE(ptr != NULL);
+      ASSERT_LE(100U, malloc_usable_size(ptr));
+      ASSERT_EQ(0, (intptr_t)ptr % (1 << i));
+
+      free(ptr);
+    }
+  }
+}
+
+TEST(malloc, memalign_realloc) {
+  // Memalign and then realloc the pointer a couple of times.
+  for (size_t alignment = 1; alignment <= 4096; alignment <<= 1) {
+    char *ptr = (char*)memalign(alignment, 100);
+    ASSERT_TRUE(ptr != NULL);
+    ASSERT_LE(100U, malloc_usable_size(ptr));
+    ASSERT_EQ(0U, (intptr_t)ptr % alignment);
+    memset(ptr, 0x23, 100);
+
+    ptr = (char*)realloc(ptr, 200);
+    ASSERT_TRUE(ptr != NULL);
+    ASSERT_LE(200U, malloc_usable_size(ptr));
+    ASSERT_TRUE(ptr != NULL);
+    for (size_t i = 0; i < 100; i++) {
+      ASSERT_EQ(0x23, ptr[i]);
+    }
+    memset(ptr, 0x45, 200);
+
+    ptr = (char*)realloc(ptr, 300);
+    ASSERT_TRUE(ptr != NULL);
+    ASSERT_LE(300U, malloc_usable_size(ptr));
+    for (size_t i = 0; i < 200; i++) {
+      ASSERT_EQ(0x45, ptr[i]);
+    }
+    memset(ptr, 0x67, 300);
+
+    ptr = (char*)realloc(ptr, 250);
+    ASSERT_TRUE(ptr != NULL);
+    ASSERT_LE(250U, malloc_usable_size(ptr));
+    for (size_t i = 0; i < 250; i++) {
+      ASSERT_EQ(0x67, ptr[i]);
+    }
+
+    free(ptr);
+  }
+}
+
+TEST(malloc, malloc_realloc_larger) {
+  // Realloc to a larger size, malloc is used for the original allocation.
+  char *ptr = (char *)malloc(100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+  memset(ptr, 67, 100);
+
+  ptr = (char *)realloc(ptr, 200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(67, ptr[i]);
+  }
+
+  free(ptr);
+}
+
+TEST(malloc, malloc_realloc_smaller) {
+  // Realloc to a smaller size, malloc is used for the original allocation.
+  char *ptr = (char *)malloc(200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+  memset(ptr, 67, 200);
+
+  ptr = (char *)realloc(ptr, 100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(67, ptr[i]);
+  }
+
+  free(ptr);
+}
+
+TEST(malloc, malloc_multiple_realloc) {
+  // Multiple reallocs, malloc is used for the original allocation.
+  char *ptr = (char *)malloc(200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+  memset(ptr, 0x23, 200);
+
+  ptr = (char *)realloc(ptr, 100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(0x23, ptr[i]);
+  }
+
+  ptr = (char*)realloc(ptr, 50);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(50U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 50; i++) {
+    ASSERT_EQ(0x23, ptr[i]);
+  }
+
+  ptr = (char*)realloc(ptr, 150);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(150U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 50; i++) {
+    ASSERT_EQ(0x23, ptr[i]);
+  }
+  memset(ptr, 0x23, 150);
+
+  ptr = (char*)realloc(ptr, 425);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(425U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 150; i++) {
+    ASSERT_EQ(0x23, ptr[i]);
+  }
+
+  free(ptr);
+}
+TEST(malloc, calloc_realloc_larger) {
+  // Realloc to a larger size, calloc is used for the original allocation.
+  char *ptr = (char *)calloc(1, 100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+
+  ptr = (char *)realloc(ptr, 200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  free(ptr);
+}
+
+TEST(malloc, calloc_realloc_smaller) {
+  // Realloc to a smaller size, calloc is used for the original allocation.
+  char *ptr = (char *)calloc(1, 200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+
+  ptr = (char *)realloc(ptr, 100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  free(ptr);
+}
+
+TEST(malloc, calloc_multiple_realloc) {
+  // Multiple reallocs, calloc is used for the original allocation.
+  char *ptr = (char *)calloc(1, 200);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(200U, malloc_usable_size(ptr));
+
+  ptr = (char *)realloc(ptr, 100);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(100U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 100; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  ptr = (char*)realloc(ptr, 50);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(50U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 50; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  ptr = (char*)realloc(ptr, 150);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(150U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 50; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+  memset(ptr, 0, 150);
+
+  ptr = (char*)realloc(ptr, 425);
+  ASSERT_TRUE(ptr != NULL);
+  ASSERT_LE(425U, malloc_usable_size(ptr));
+  for (size_t i = 0; i < 150; i++) {
+    ASSERT_EQ(0, ptr[i]);
+  }
+
+  free(ptr);
+}