imprecise mark and sweep native memory leak detector

libmemunreachable uses an imprecise mark and sweep pass over all memory
allocated by jemalloc in order to find unreachable allocations.

Change-Id: Ia70bbf31f5b40ff71dab28cfd6cd06c5ef01a2d4
diff --git a/libmemunreachable/Allocator.cpp b/libmemunreachable/Allocator.cpp
new file mode 100644
index 0000000..b75f1e5
--- /dev/null
+++ b/libmemunreachable/Allocator.cpp
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Header page:
+//
+// For minimum allocation size (8 bytes), bitmap can store used allocations for
+// up to 4032*8*8=258048, which is 256KiB minus the header page
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include <sys/cdefs.h>
+#include <sys/mman.h>
+
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <mutex>
+
+#include "android-base/macros.h"
+
+#include "anon_vma_naming.h"
+#include "Allocator.h"
+#include "LinkedList.h"
+
+// runtime interfaces used:
+// abort
+// assert - fprintf + mmap
+// mmap
+// munmap
+// prctl
+
+constexpr size_t const_log2(size_t n, size_t p = 0) {
+  return (n <= 1) ? p : const_log2(n / 2, p + 1);
+}
+
+constexpr unsigned int div_round_up(unsigned int x, unsigned int y) {
+  return (x + y - 1) / y;
+}
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+static constexpr size_t kPageSize = 4096;
+static constexpr size_t kChunkSize = 256 * 1024;
+static constexpr size_t kUsableChunkSize = kChunkSize - kPageSize;
+static constexpr size_t kMaxBucketAllocationSize = kChunkSize / 4;
+static constexpr size_t kMinBucketAllocationSize = 8;
+static constexpr unsigned int kNumBuckets = const_log2(kMaxBucketAllocationSize)
+    - const_log2(kMinBucketAllocationSize) + 1;
+static constexpr unsigned int kUsablePagesPerChunk = kUsableChunkSize
+    / kPageSize;
+
+std::atomic<int> heap_count;
+
+class Chunk;
+
+class HeapImpl {
+ public:
+  HeapImpl();
+  ~HeapImpl();
+  void* operator new(std::size_t count) noexcept;
+  void operator delete(void* ptr);
+
+  void* Alloc(size_t size);
+  void Free(void* ptr);
+  bool Empty();
+
+  void MoveToFullList(Chunk* chunk, int bucket_);
+  void MoveToFreeList(Chunk* chunk, int bucket_);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HeapImpl);
+
+  LinkedList<Chunk*> free_chunks_[kNumBuckets];
+  LinkedList<Chunk*> full_chunks_[kNumBuckets];
+
+  void MoveToList(Chunk* chunk, LinkedList<Chunk*>* head);
+  void* MapAlloc(size_t size);
+  void MapFree(void* ptr);
+  void* AllocLocked(size_t size);
+  void FreeLocked(void* ptr);
+
+  struct MapAllocation {
+    void *ptr;
+    size_t size;
+    MapAllocation* next;
+  };
+  MapAllocation* map_allocation_list_;
+  std::mutex m_;
+};
+
+// Integer log 2, rounds down
+static inline unsigned int log2(size_t n) {
+  return 8 * sizeof(unsigned long long) - __builtin_clzll(n) - 1;
+}
+
+static inline unsigned int size_to_bucket(size_t size) {
+  if (size < kMinBucketAllocationSize)
+    return kMinBucketAllocationSize;
+  return log2(size - 1) + 1 - const_log2(kMinBucketAllocationSize);
+}
+
+static inline size_t bucket_to_size(unsigned int bucket) {
+  return kMinBucketAllocationSize << bucket;
+}
+
+static void* MapAligned(size_t size, size_t align) {
+  const int prot = PROT_READ | PROT_WRITE;
+  const int flags = MAP_ANONYMOUS | MAP_PRIVATE;
+
+  size = (size + kPageSize - 1) & ~(kPageSize - 1);
+
+  // Over-allocate enough to align
+  size_t map_size = size + align - kPageSize;
+  if (map_size < size) {
+    return nullptr;
+  }
+
+  void* ptr = mmap(NULL, map_size, prot, flags, -1, 0);
+  if (ptr == MAP_FAILED) {
+    return nullptr;
+  }
+
+  size_t aligned_size = map_size;
+  void* aligned_ptr = ptr;
+
+  std::align(align, size, aligned_ptr, aligned_size);
+
+  // Trim beginning
+  if (aligned_ptr != ptr) {
+    ptrdiff_t extra = reinterpret_cast<uintptr_t>(aligned_ptr)
+        - reinterpret_cast<uintptr_t>(ptr);
+    munmap(ptr, extra);
+    map_size -= extra;
+    ptr = aligned_ptr;
+  }
+
+  // Trim end
+  if (map_size != size) {
+    assert(map_size > size);
+    assert(ptr != NULL);
+    munmap(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(ptr) + size),
+        map_size - size);
+  }
+
+#define PR_SET_VMA   0x53564d41
+#define PR_SET_VMA_ANON_NAME    0
+  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME,
+      reinterpret_cast<uintptr_t>(ptr), size, "leak_detector_malloc");
+
+  return ptr;
+}
+
+class Chunk {
+ public:
+  static void* operator new(std::size_t count) noexcept;
+  static void operator delete(void* ptr);
+  Chunk(HeapImpl* heap, int bucket);
+  ~Chunk() {}
+
+  void *Alloc();
+  void Free(void* ptr);
+  void Purge();
+  bool Empty();
+
+  static Chunk* ptr_to_chunk(void* ptr) {
+    return reinterpret_cast<Chunk*>(reinterpret_cast<uintptr_t>(ptr)
+        & ~(kChunkSize - 1));
+  }
+  static bool is_chunk(void* ptr) {
+    return (reinterpret_cast<uintptr_t>(ptr) & (kChunkSize - 1)) != 0;
+  }
+
+  unsigned int free_count() {
+    return free_count_;
+  }
+  HeapImpl* heap() {
+    return heap_;
+  }
+  LinkedList<Chunk*> node_; // linked list sorted by minimum free count
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Chunk);
+  HeapImpl* heap_;
+  unsigned int bucket_;
+  unsigned int allocation_size_; // size of allocations in chunk, min 8 bytes
+  unsigned int max_allocations_; // maximum number of allocations in the chunk
+  unsigned int first_free_bitmap_; // index into bitmap for first non-full entry
+  unsigned int free_count_; // number of available allocations
+  unsigned int frees_since_purge_; // number of calls to Free since last Purge
+
+  // bitmap of pages that have been dirtied
+  uint32_t dirty_pages_[div_round_up(kUsablePagesPerChunk, 32)];
+
+  // bitmap of free allocations.
+  uint32_t free_bitmap_[kUsableChunkSize / kMinBucketAllocationSize / 32];
+
+  char data_[0];
+
+  unsigned int ptr_to_n(void* ptr) {
+    ptrdiff_t offset = reinterpret_cast<uintptr_t>(ptr)
+        - reinterpret_cast<uintptr_t>(data_);
+    return offset / allocation_size_;
+  }
+  void* n_to_ptr(unsigned int n) {
+    return data_ + n * allocation_size_;
+  }
+};
+static_assert(sizeof(Chunk) <= kPageSize, "header must fit in page");
+
+// Override new operator on chunk to use mmap to allocate kChunkSize
+void* Chunk::operator new(std::size_t count __attribute__((unused))) noexcept {
+  assert(count == sizeof(Chunk));
+  void* mem = MapAligned(kChunkSize, kChunkSize);
+  if (!mem) {
+    abort(); //throw std::bad_alloc;
+  }
+
+  return mem;
+}
+
+// Override new operator on chunk to use mmap to allocate kChunkSize
+void Chunk::operator delete(void *ptr) {
+  assert(reinterpret_cast<Chunk*>(ptr) == ptr_to_chunk(ptr));
+  munmap(ptr, kChunkSize);
+}
+
+Chunk::Chunk(HeapImpl* heap, int bucket) :
+    node_(this), heap_(heap), bucket_(bucket), allocation_size_(
+        bucket_to_size(bucket)), max_allocations_(
+        kUsableChunkSize / allocation_size_), first_free_bitmap_(0), free_count_(
+        max_allocations_), frees_since_purge_(0) {
+  memset(dirty_pages_, 0, sizeof(dirty_pages_));
+  memset(free_bitmap_, 0xff, sizeof(free_bitmap_));
+}
+
+bool Chunk::Empty() {
+  return free_count_ == max_allocations_;
+}
+
+void* Chunk::Alloc() {
+  assert(free_count_ > 0);
+
+  unsigned int i = first_free_bitmap_;
+  while (free_bitmap_[i] == 0)
+    i++;
+  assert(i < ARRAY_SIZE(free_bitmap_));
+  unsigned int bit = __builtin_ffs(free_bitmap_[i]) - 1;
+  assert(free_bitmap_[i] & (1U << bit));
+  free_bitmap_[i] &= ~(1U << bit);
+  unsigned int n = i * 32 + bit;
+  assert(n < max_allocations_);
+
+  unsigned int page = n * allocation_size_ / kPageSize;
+  assert(page / 32 < ARRAY_SIZE(dirty_pages_));
+  dirty_pages_[page / 32] |= 1U << (page % 32);
+
+  free_count_--;
+  if (free_count_ == 0) {
+    heap_->MoveToFullList(this, bucket_);
+  }
+
+  return n_to_ptr(n);
+}
+
+void Chunk::Free(void* ptr) {
+  assert(is_chunk(ptr));
+  assert(ptr_to_chunk(ptr) == this);
+
+  unsigned int n = ptr_to_n(ptr);
+  unsigned int i = n / 32;
+  unsigned int bit = n % 32;
+
+  assert(i < ARRAY_SIZE(free_bitmap_));
+  assert(!(free_bitmap_[i] & (1U << bit)));
+  free_bitmap_[i] |= 1U << bit;
+  free_count_++;
+
+  if (i < first_free_bitmap_) {
+    first_free_bitmap_ = i;
+  }
+
+  if (free_count_ == 1) {
+    heap_->MoveToFreeList(this, bucket_);
+  } else {
+    // TODO(ccross): move down free list if necessary
+  }
+
+  if (frees_since_purge_++ * allocation_size_ > 16 * kPageSize) {
+    Purge();
+  }
+}
+
+void Chunk::Purge() {
+  frees_since_purge_ = 0;
+
+  //unsigned int allocsPerPage = kPageSize / allocation_size_;
+}
+
+// Override new operator on HeapImpl to use mmap to allocate a page
+void* HeapImpl::operator new(std::size_t count __attribute__((unused)))
+    noexcept {
+  assert(count == sizeof(HeapImpl));
+  void* mem = MapAligned(kPageSize, kPageSize);
+  if (!mem) {
+    abort(); //throw std::bad_alloc;
+  }
+
+  heap_count++;
+  return mem;
+}
+
+void HeapImpl::operator delete(void *ptr) {
+  munmap(ptr, kPageSize);
+}
+
+HeapImpl::HeapImpl() :
+    free_chunks_(), full_chunks_(), map_allocation_list_(NULL) {
+}
+
+bool HeapImpl::Empty() {
+  for (unsigned int i = 0; i < kNumBuckets; i++) {
+    for (LinkedList<Chunk*> *it = free_chunks_[i].next(); it->data() != NULL; it = it->next()) {
+      if (!it->data()->Empty()) {
+        return false;
+      }
+    }
+    for (LinkedList<Chunk*> *it = full_chunks_[i].next(); it->data() != NULL; it = it->next()) {
+      if (!it->data()->Empty()) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+HeapImpl::~HeapImpl() {
+  for (unsigned int i = 0; i < kNumBuckets; i++) {
+    while (!free_chunks_[i].empty()) {
+      Chunk *chunk = free_chunks_[i].next()->data();
+      chunk->node_.remove();
+      delete chunk;
+    }
+    while (!full_chunks_[i].empty()) {
+      Chunk *chunk = full_chunks_[i].next()->data();
+      chunk->node_.remove();
+      delete chunk;
+    }
+  }
+}
+
+void* HeapImpl::Alloc(size_t size) {
+  std::lock_guard<std::mutex> lk(m_);
+  return AllocLocked(size);
+}
+
+void* HeapImpl::AllocLocked(size_t size) {
+  if (__predict_false(size > kMaxBucketAllocationSize)) {
+    return MapAlloc(size);
+  }
+  int bucket = size_to_bucket(size);
+  if (__predict_false(free_chunks_[bucket].empty())) {
+    Chunk *chunk = new Chunk(this, bucket);
+    free_chunks_[bucket].insert(chunk->node_);
+  }
+  return free_chunks_[bucket].next()->data()->Alloc();
+}
+
+void HeapImpl::Free(void *ptr) {
+  std::lock_guard<std::mutex> lk(m_);
+  FreeLocked(ptr);
+}
+
+void HeapImpl::FreeLocked(void *ptr) {
+  if (!Chunk::is_chunk(ptr)) {
+    HeapImpl::MapFree(ptr);
+  } else {
+    Chunk* chunk = Chunk::ptr_to_chunk(ptr);
+    assert(chunk->heap() == this);
+    chunk->Free(ptr);
+  }
+}
+
+void* HeapImpl::MapAlloc(size_t size) {
+  size = (size + kPageSize - 1) & ~(kPageSize - 1);
+
+  MapAllocation* allocation = reinterpret_cast<MapAllocation*>(AllocLocked(
+      sizeof(MapAllocation)));
+  void* ptr = MapAligned(size, kChunkSize);
+  if (!ptr) {
+    FreeLocked(allocation);
+    abort(); //throw std::bad_alloc;
+  }
+  allocation->ptr = ptr;
+  allocation->size = size;
+  allocation->next = map_allocation_list_;
+  map_allocation_list_ = allocation;
+
+  return ptr;
+}
+
+void HeapImpl::MapFree(void *ptr) {
+  MapAllocation **allocation = &map_allocation_list_;
+  while (*allocation && (*allocation)->ptr != ptr)
+    allocation = &(*allocation)->next;
+
+  assert(*allocation != nullptr);
+
+  munmap((*allocation)->ptr, (*allocation)->size);
+  FreeLocked(*allocation);
+
+  *allocation = (*allocation)->next;
+}
+
+void HeapImpl::MoveToFreeList(Chunk *chunk, int bucket) {
+  MoveToList(chunk, &free_chunks_[bucket]);
+}
+
+void HeapImpl::MoveToFullList(Chunk *chunk, int bucket) {
+  MoveToList(chunk, &full_chunks_[bucket]);
+}
+
+void HeapImpl::MoveToList(Chunk *chunk, LinkedList<Chunk*>* head) {
+  // Remove from old list
+  chunk->node_.remove();
+
+  LinkedList<Chunk*> *node = head;
+  // Insert into new list, sorted by lowest free count
+  while (node->next() != head && node->data() != nullptr
+      && node->data()->free_count() < chunk->free_count())
+    node = node->next();
+
+  node->insert(chunk->node_);
+}
+
+Heap::Heap() {
+  // HeapImpl overloads the operator new in order to mmap itself instead of
+  // allocating with new.
+  // Can't use a shared_ptr to store the result because shared_ptr needs to
+  // allocate, and Allocator<T> is still being constructed.
+  impl_ = new HeapImpl();
+  owns_impl_ = true;
+}
+
+Heap::~Heap() {
+  if (owns_impl_) {
+    delete impl_;
+  }
+}
+
+void* Heap::allocate(size_t size) {
+  return impl_->Alloc(size);
+}
+
+void Heap::deallocate(void* ptr) {
+  impl_->Free(ptr);
+}
+
+void Heap::deallocate(HeapImpl*impl, void* ptr) {
+  impl->Free(ptr);
+}
+
+bool Heap::empty() {
+  return impl_->Empty();
+}