On heap tracking datastructure overflow, degrade instead of CHECK()

Based on ajwong's CL https://codereview.chromium.org/2784783003/
This allows the heap profiler to on systems with heavy memory usage
(sometimes caused by strange profiles).

Next step is to surface the overflow on the tracing UI.

BUG=698079

Review-Url: https://codereview.chromium.org/2787383002
Cr-Commit-Position: refs/heads/master@{#461485}


CrOS-Libchrome-Original-Commit: c1d887e1b41c37348a412bcb956b641ef5b5caf1
diff --git a/base/trace_event/heap_profiler_allocation_register.cc b/base/trace_event/heap_profiler_allocation_register.cc
index 63d4061..1cfb983 100644
--- a/base/trace_event/heap_profiler_allocation_register.cc
+++ b/base/trace_event/heap_profiler_allocation_register.cc
@@ -5,6 +5,7 @@
 #include "base/trace_event/heap_profiler_allocation_register.h"
 
 #include <algorithm>
+#include <limits>
 
 #include "base/trace_event/trace_event_memory_overhead.h"
 
@@ -12,9 +13,9 @@
 namespace trace_event {
 
 AllocationRegister::ConstIterator::ConstIterator(
-    const AllocationRegister& alloc_register, AllocationIndex index)
-    : register_(alloc_register),
-      index_(index) {}
+    const AllocationRegister& alloc_register,
+    AllocationIndex index)
+    : register_(alloc_register), index_(index) {}
 
 void AllocationRegister::ConstIterator::operator++() {
   index_ = register_.allocations_.Next(index_ + 1);
@@ -25,12 +26,12 @@
   return index_ != other.index_;
 }
 
-AllocationRegister::Allocation
-AllocationRegister::ConstIterator::operator*() const {
+AllocationRegister::Allocation AllocationRegister::ConstIterator::operator*()
+    const {
   return register_.GetAllocation(index_);
 }
 
-size_t AllocationRegister::BacktraceHasher::operator () (
+size_t AllocationRegister::BacktraceHasher::operator()(
     const Backtrace& backtrace) const {
   const size_t kSampleLength = 10;
 
@@ -42,7 +43,7 @@
   }
 
   size_t tail_start = backtrace.frame_count -
-      std::min(backtrace.frame_count - head_end, kSampleLength);
+                      std::min(backtrace.frame_count - head_end, kSampleLength);
   for (size_t i = tail_start; i != backtrace.frame_count; ++i) {
     total_value += reinterpret_cast<uintptr_t>(backtrace.frames[i].value);
   }
@@ -55,7 +56,7 @@
   return (total_value * 131101) >> 14;
 }
 
-size_t AllocationRegister::AddressHasher::operator () (
+size_t AllocationRegister::AddressHasher::operator()(
     const void* address) const {
   // The multiplicative hashing scheme from [Knuth 1998]. The value of |a| has
   // been chosen carefully based on measurements with real-word data (addresses
@@ -75,34 +76,48 @@
 
 AllocationRegister::AllocationRegister(size_t allocation_capacity,
                                        size_t backtrace_capacity)
-    : allocations_(allocation_capacity),
-      backtraces_(backtrace_capacity) {}
+    : allocations_(allocation_capacity), backtraces_(backtrace_capacity) {
+  Backtrace sentinel = {};
+  sentinel.frames[0] = StackFrame::FromThreadName("[out of heap profiler mem]");
+  sentinel.frame_count = 1;
 
-AllocationRegister::~AllocationRegister() {
+  // Rationale for max / 2: in theory we could just start the sentinel with a
+  // refcount == 0. However, this optimization avoids to hit the 2nd condition
+  // of the "if" in RemoveBacktrace, hence reducing the chances of hurting the
+  // fastpath. From a functional viewpoint, the sentinel is safe even if we wrap
+  // over the refcount.
+  BacktraceMap::KVPair::second_type sentinel_refcount =
+      std::numeric_limits<BacktraceMap::KVPair::second_type>::max() / 2;
+  auto index_and_flag = backtraces_.Insert(sentinel, sentinel_refcount);
+  DCHECK(index_and_flag.second);
+  out_of_storage_backtrace_index_ = index_and_flag.first;
 }
 
-void AllocationRegister::Insert(const void* address,
+AllocationRegister::~AllocationRegister() {}
+
+bool AllocationRegister::Insert(const void* address,
                                 size_t size,
                                 const AllocationContext& context) {
   DCHECK(address != nullptr);
   if (size == 0) {
-    return;
+    return false;
   }
 
-  AllocationInfo info = {
-      size,
-      context.type_name,
-      InsertBacktrace(context.backtrace)
-  };
+  AllocationInfo info = {size, context.type_name,
+                         InsertBacktrace(context.backtrace)};
 
   // Try to insert the allocation.
   auto index_and_flag = allocations_.Insert(address, info);
-  if (!index_and_flag.second) {
+  if (!index_and_flag.second &&
+      index_and_flag.first != AllocationMap::kInvalidKVIndex) {
     // |address| is already there - overwrite the allocation info.
     auto& old_info = allocations_.Get(index_and_flag.first).second;
     RemoveBacktrace(old_info.backtrace_index);
     old_info = info;
+    return true;
   }
+
+  return index_and_flag.second;
 }
 
 void AllocationRegister::Remove(const void* address) {
@@ -140,15 +155,17 @@
 void AllocationRegister::EstimateTraceMemoryOverhead(
     TraceEventMemoryOverhead* overhead) const {
   size_t allocated = sizeof(AllocationRegister);
-  size_t resident = sizeof(AllocationRegister)
-                    + allocations_.EstimateUsedMemory()
-                    + backtraces_.EstimateUsedMemory();
+  size_t resident = sizeof(AllocationRegister) +
+                    allocations_.EstimateUsedMemory() +
+                    backtraces_.EstimateUsedMemory();
   overhead->Add("AllocationRegister", allocated, resident);
 }
 
 AllocationRegister::BacktraceMap::KVIndex AllocationRegister::InsertBacktrace(
     const Backtrace& backtrace) {
   auto index = backtraces_.Insert(backtrace, 0).first;
+  if (index == BacktraceMap::kInvalidKVIndex)
+    return out_of_storage_backtrace_index_;
   auto& backtrace_and_count = backtraces_.Get(index);
   backtrace_and_count.second++;
   return index;
@@ -156,7 +173,8 @@
 
 void AllocationRegister::RemoveBacktrace(BacktraceMap::KVIndex index) {
   auto& backtrace_and_count = backtraces_.Get(index);
-  if (--backtrace_and_count.second == 0) {
+  if (--backtrace_and_count.second == 0 &&
+      index != out_of_storage_backtrace_index_) {
     // Backtrace is not referenced anymore - remove it.
     backtraces_.Remove(index);
   }
@@ -165,15 +183,11 @@
 AllocationRegister::Allocation AllocationRegister::GetAllocation(
     AllocationMap::KVIndex index) const {
   const auto& address_and_info = allocations_.Get(index);
-  const auto& backtrace_and_count = backtraces_.Get(
-      address_and_info.second.backtrace_index);
-  return {
-      address_and_info.first,
-      address_and_info.second.size,
-      AllocationContext(
-          backtrace_and_count.first,
-          address_and_info.second.type_name)
-  };
+  const auto& backtrace_and_count =
+      backtraces_.Get(address_and_info.second.backtrace_index);
+  return {address_and_info.first, address_and_info.second.size,
+          AllocationContext(backtrace_and_count.first,
+                            address_and_info.second.type_name)};
 }
 
 }  // namespace trace_event
diff --git a/base/trace_event/heap_profiler_allocation_register.h b/base/trace_event/heap_profiler_allocation_register.h
index d6a02fa..6d63923 100644
--- a/base/trace_event/heap_profiler_allocation_register.h
+++ b/base/trace_event/heap_profiler_allocation_register.h
@@ -48,24 +48,26 @@
   // For implementation simplicity API uses integer index instead
   // of iterators. Most operations (except Find) on KVIndex are O(1).
   using KVIndex = size_t;
-  static const KVIndex kInvalidKVIndex = static_cast<KVIndex>(-1);
+  enum : KVIndex { kInvalidKVIndex = static_cast<KVIndex>(-1) };
 
   // Capacity controls how many items this hash map can hold, and largely
   // affects memory footprint.
   FixedHashMap(size_t capacity)
-    : num_cells_(capacity),
-      cells_(static_cast<Cell*>(
-          AllocateGuardedVirtualMemory(num_cells_ * sizeof(Cell)))),
-      buckets_(static_cast<Bucket*>(
-          AllocateGuardedVirtualMemory(NumBuckets * sizeof(Bucket)))),
-      free_list_(nullptr),
-      next_unused_cell_(0) {}
+      : num_cells_(capacity),
+        num_inserts_dropped_(0),
+        cells_(static_cast<Cell*>(
+            AllocateGuardedVirtualMemory(num_cells_ * sizeof(Cell)))),
+        buckets_(static_cast<Bucket*>(
+            AllocateGuardedVirtualMemory(NumBuckets * sizeof(Bucket)))),
+        free_list_(nullptr),
+        next_unused_cell_(0) {}
 
   ~FixedHashMap() {
     FreeGuardedVirtualMemory(cells_, num_cells_ * sizeof(Cell));
     FreeGuardedVirtualMemory(buckets_, NumBuckets * sizeof(Bucket));
   }
 
+  // Returns {kInvalidKVIndex, false} if the table is full.
   std::pair<KVIndex, bool> Insert(const Key& key, const Value& value) {
     Cell** p_cell = Lookup(key);
     Cell* cell = *p_cell;
@@ -74,7 +76,15 @@
     }
 
     // Get a free cell and link it.
-    *p_cell = cell = GetFreeCell();
+    cell = GetFreeCell();
+    if (!cell) {
+      if (num_inserts_dropped_ <
+          std::numeric_limits<decltype(num_inserts_dropped_)>::max()) {
+        ++num_inserts_dropped_;
+      }
+      return {kInvalidKVIndex, false};
+    }
+    *p_cell = cell;
     cell->p_prev = p_cell;
     cell->next = nullptr;
 
@@ -137,6 +147,8 @@
            bits::Align(sizeof(Bucket) * NumBuckets, page_size);
   }
 
+  size_t num_inserts_dropped() const { return num_inserts_dropped_; }
+
  private:
   friend base::trace_event::AllocationRegisterTest;
 
@@ -175,7 +187,8 @@
   }
 
   // Returns a cell that is not being used to store an entry (either by
-  // recycling from the free list or by taking a fresh cell).
+  // recycling from the free list or by taking a fresh cell). May return
+  // nullptr if the hash table has run out of memory.
   Cell* GetFreeCell() {
     // First try to re-use a cell from the free list.
     if (free_list_) {
@@ -184,26 +197,14 @@
       return cell;
     }
 
-    // Otherwise pick the next cell that has not been touched before.
-    size_t idx = next_unused_cell_;
-    next_unused_cell_++;
-
     // If the hash table has too little capacity (when too little address space
-    // was reserved for |cells_|), |next_unused_cell_| can be an index outside
-    // of the allocated storage. A guard page is allocated there to crash the
-    // program in that case. There are alternative solutions:
-    // - Deal with it, increase capacity by reallocating |cells_|.
-    // - Refuse to insert and let the caller deal with it.
-    // Because free cells are re-used before accessing fresh cells with a higher
-    // index, and because reserving address space without touching it is cheap,
-    // the simplest solution is to just allocate a humongous chunk of address
-    // space.
+    // was reserved for |cells_|), return nullptr.
+    if (next_unused_cell_ >= num_cells_) {
+      return nullptr;
+    }
 
-    CHECK_LT(next_unused_cell_, num_cells_ + 1)
-        << "Allocation Register hash table has too little capacity. Increase "
-           "the capacity to run heap profiler in large sessions.";
-
-    return &cells_[idx];
+    // Otherwise pick the next cell that has not been touched before.
+    return &cells_[next_unused_cell_++];
   }
 
   // Returns a value in the range [0, NumBuckets - 1] (inclusive).
@@ -219,6 +220,9 @@
   // Number of cells.
   size_t const num_cells_;
 
+  // Number of calls to Insert() that were lost because the hashtable was full.
+  size_t num_inserts_dropped_;
+
   // The array of cells. This array is backed by mmapped memory. Lower indices
   // are accessed first, higher indices are accessed only when the |free_list_|
   // is empty. This is to minimize the amount of resident memory used.
@@ -282,7 +286,10 @@
 
   // Inserts allocation details into the table. If the address was present
   // already, its details are updated. |address| must not be null.
-  void Insert(const void* address,
+  //
+  // Returns true if an insert occurred. Inserts may fail because the table
+  // is full.
+  bool Insert(const void* address,
               size_t size,
               const AllocationContext& context);
 
@@ -359,6 +366,9 @@
   AllocationMap allocations_;
   BacktraceMap backtraces_;
 
+  // Sentinel used when we run out of backtraces_ storage.
+  BacktraceMap::KVIndex out_of_storage_backtrace_index_;
+
   DISALLOW_COPY_AND_ASSIGN(AllocationRegister);
 };
 
diff --git a/base/trace_event/heap_profiler_allocation_register_unittest.cc b/base/trace_event/heap_profiler_allocation_register_unittest.cc
index 7eee61a..950bab3 100644
--- a/base/trace_event/heap_profiler_allocation_register_unittest.cc
+++ b/base/trace_event/heap_profiler_allocation_register_unittest.cc
@@ -245,24 +245,59 @@
   EXPECT_FALSE(reg.Get(reinterpret_cast<void*>(19), &a));
 }
 
-// Check that the process aborts due to hitting the guard page when inserting
-// too many elements.
-#if GTEST_HAS_DEATH_TEST
-TEST_F(AllocationRegisterTest, OverflowDeathTest) {
+// Check that the table handles overflows of the allocation storage gracefully.
+TEST_F(AllocationRegisterTest, OverflowAllocationTest) {
   const size_t allocation_capacity = GetAllocationCapacityPerPage();
   AllocationRegister reg(allocation_capacity, kBacktraceCapacity);
   AllocationContext ctx;
   size_t i;
 
-  // Fill up all of the memory allocated for the register's allocation map.
-  for (i = 0; i < allocation_capacity; i++) {
-    reg.Insert(reinterpret_cast<void*>(i + 1), 1, ctx);
+  for (int repetition = 0; repetition < 3; repetition++) {
+    // Fill up all of the memory allocated for the register's allocation map.
+    for (i = 0; i < allocation_capacity; i++)
+      ASSERT_TRUE(reg.Insert(reinterpret_cast<void*>(i + 1), 1, ctx));
+
+    // Adding just one extra element should cause overflow.
+    ASSERT_FALSE(reg.Insert(reinterpret_cast<void*>(i + 1), 1, ctx));
+
+    // Removing all allocations shouldn't cause any crash.
+    for (i = 0; i < allocation_capacity; i++) {
+      reg.Remove(reinterpret_cast<void*>(i + 1));
+    }
+  }
+}
+
+// Check that the table handles overflows of the backtrace storage (but not the
+// allocations storage) gracefully.
+TEST_F(AllocationRegisterTest, OverflowBacktraceTest) {
+  const size_t backtrace_capacity = GetAllocationCapacityPerPage();
+  const size_t allocation_capacity = 3 * GetAllocationCapacityPerPage();
+  AllocationRegister reg(allocation_capacity, backtrace_capacity);
+  AllocationContext ctx;
+  size_t i;
+
+  // Fill up all of the memory allocated for the backtrace allocation map,
+  // but do not fill the allocations map.
+  for (i = 1; i <= backtrace_capacity * 2; i++) {
+    void* addr = reinterpret_cast<void*>(i);
+    ctx.backtrace.frames[0] = StackFrame::FromProgramCounter(addr);
+    ctx.backtrace.frame_count = 1;
+    ASSERT_TRUE(reg.Insert(addr, 1, ctx));
   }
 
-  // Adding just one extra element should cause overflow.
-  ASSERT_DEATH(reg.Insert(reinterpret_cast<void*>(i + 1), 1, ctx), "");
+  // Removing all allocations shouldn't cause any crash.
+  for (i = 1; i <= backtrace_capacity * 2; i++) {
+    AllocationRegister::Allocation allocation = {};
+    ASSERT_TRUE(reg.Get(reinterpret_cast<void*>(i), &allocation));
+
+    // This is just to check the integrity of the backtrace sentinel and to make
+    // sure that we don't hit the guard page.
+    ASSERT_LT(allocation.context.backtrace.frame_count,
+              static_cast<size_t>(Backtrace::kMaxFrameCount));
+
+    reg.Remove(reinterpret_cast<void*>(i));
+  }
 }
-#endif
 
 }  // namespace trace_event
 }  // namespace base