profiler: decouple ThreadHeapUsageTracker from allocator shim internals Background: ----------- Until crrev.com/2658723007, the allocator shim was initialized at compile time and hence always ready to be used. After that CL, the shim requires a run-time initialization. The code in ThreadHeapUsageTracker currently assumes that the shim is initialized by directly calling into the shim chain even before a malloc/free call is intercepted. There are two possible solutions to this: 1. Initialize the shim before ThreadHeapUsageTracker is initialized. 2. Prevent ThreadHeapUsageTracker from directly entering the shim. (more discussion in https://codereview.chromium.org/2658723007/#msg53). 1. is a one liner but not particularly clean. ThreadHeapUsageTracker is lazily initialized when a Thread (the main thread) is created (in the TrackedObject construction). This would mean, in practice, putting the shim initialization in tracked_objects.cc which is quite obscure. This CL: -------- The approach used here is 2. The existing code had already the right sentinel logic to detect and prevent re-entrancy. This CL is just extending that to the dtor and just using new/delete to create the TLS object. This allows to initialize the shim in a less obscure place (e.g. content_main_runner). BUG=665567,644385 TEST=Build with use_experimental_allocator_shim=true on Mac, run base_unittests Review-Url: https://codereview.chromium.org/2675433002 Cr-Commit-Position: refs/heads/master@{#448360} CrOS-Libchrome-Original-Commit: e9ffc9c6104f4cc7ff5e62a94869f111c645106a

commit: 4898d767ce6fabe5c036c446045c1fbbd79c515d [log] [tgz]
author: primiano <primiano@chromium.org> Tue Feb 07 04:52:44 2017 +0900
committer: Qijiang Fan <fqj@google.com> Fri Jun 05 05:14:48 2020 +0900
tree: 73932e352df11b316f8abdfabce780d85b114f7d
parent: 79826b2679e6d2e4da06f6ca07560fb12cec9866 [diff]
diff --git a/base/debug/thread_heap_usage_tracker.cc b/base/debug/thread_heap_usage_tracker.cc
index 3761cf9..c8a0ba4 100644
--- a/base/debug/thread_heap_usage_tracker.cc
+++ b/base/debug/thread_heap_usage_tracker.cc

@@ -6,6 +6,7 @@
 
 #include <stdint.h>
 #include <algorithm>
+#include <limits>
 #include <new>
 #include <type_traits>
 
@@ -30,8 +31,11 @@
 
 ThreadLocalStorage::StaticSlot g_thread_allocator_usage = TLS_INITIALIZER;
 
-ThreadHeapUsage* const kInitializingSentinel =
-    reinterpret_cast<ThreadHeapUsage*>(-1);
+const uintptr_t kSentinelMask = std::numeric_limits<uintptr_t>::max() - 1;
+ThreadHeapUsage* const kInitializationSentinel =
+    reinterpret_cast<ThreadHeapUsage*>(kSentinelMask);
+ThreadHeapUsage* const kTeardownSentinel =
+    reinterpret_cast<ThreadHeapUsage*>(kSentinelMask | 1);
 
 bool g_heap_tracking_enabled = false;
 
@@ -172,21 +176,16 @@
                                         nullptr};
 
 ThreadHeapUsage* GetOrCreateThreadUsage() {
-  ThreadHeapUsage* allocator_usage =
-      static_cast<ThreadHeapUsage*>(g_thread_allocator_usage.Get());
-  if (allocator_usage == kInitializingSentinel)
+  auto tls_ptr = reinterpret_cast<uintptr_t>(g_thread_allocator_usage.Get());
+  if ((tls_ptr & kSentinelMask) == kSentinelMask)
     return nullptr;  // Re-entrancy case.
 
+  auto* allocator_usage = reinterpret_cast<ThreadHeapUsage*>(tls_ptr);
   if (allocator_usage == nullptr) {
     // Prevent reentrancy due to the allocation below.
-    g_thread_allocator_usage.Set(kInitializingSentinel);
+    g_thread_allocator_usage.Set(kInitializationSentinel);
 
-    // Delegate the allocation of the per-thread structure to the underlying
-    // heap shim, for symmetry with the deallocation. Otherwise interposing
-    // shims may mis-attribute or mis-direct this allocation.
-    const AllocatorDispatch* next = allocator_dispatch.next;
-    allocator_usage = new (next->alloc_function(next, sizeof(ThreadHeapUsage)))
-        ThreadHeapUsage();
+    allocator_usage = new ThreadHeapUsage();
     static_assert(std::is_pod<ThreadHeapUsage>::value,
                   "AllocatorDispatch must be POD");
     memset(allocator_usage, 0, sizeof(*allocator_usage));
@@ -297,12 +296,22 @@
 
 void ThreadHeapUsageTracker::EnsureTLSInitialized() {
   if (!g_thread_allocator_usage.initialized()) {
-    g_thread_allocator_usage.Initialize([](void* allocator_usage) {
-      // Delegate the freeing of the per-thread structure to the next-lower
-      // heap shim. Otherwise this free will re-initialize the TLS on thread
-      // exit.
-      allocator_dispatch.next->free_function(allocator_dispatch.next,
-                                             allocator_usage);
+    g_thread_allocator_usage.Initialize([](void* thread_heap_usage) {
+      // This destructor will be called twice. Once to destroy the actual
+      // ThreadHeapUsage instance and a second time, immediately after, for the
+      // sentinel. Re-setting the TLS slow (below) does re-initialize the TLS
+      // slot. The ThreadLocalStorage code is designed to deal with this use
+      // case (see comments in ThreadHeapUsageTracker::EnsureTLSInitialized) and
+      // will re-call the destructor with the kTeardownSentinel as arg.
+      if (thread_heap_usage == kTeardownSentinel)
+        return;
+      DCHECK(thread_heap_usage != kInitializationSentinel);
+
+      // Deleting the ThreadHeapUsage TLS object will re-enter the shim and hit
+      // RecordFree() above. The sentinel prevents RecordFree() from re-creating
+      // another ThreadHeapUsage object.
+      g_thread_allocator_usage.Set(kTeardownSentinel);
+      delete static_cast<ThreadHeapUsage*>(thread_heap_usage);
     });
   }
 }
commit	4898d767ce6fabe5c036c446045c1fbbd79c515d	[log] [tgz]
author	primiano <primiano@chromium.org>	Tue Feb 07 04:52:44 2017 +0900
committer	Qijiang Fan <fqj@google.com>	Fri Jun 05 05:14:48 2020 +0900
tree	73932e352df11b316f8abdfabce780d85b114f7d
parent	79826b2679e6d2e4da06f6ca07560fb12cec9866 [diff]