heapprofd_client: avoid destruction re-entrancy issues, and global destructor
(1) use unhooked malloc functions for the g_client shared_ptr.
(2) wrap g_client in a base::NoDestructor to avoid the global destructor
(and therefore the associated at-exit issues).
See bug & the contents of the patch for a detailed description of the issue.
Plus new e2e test that would've failed without this fix.
Bug: 129749217
Change-Id: I3d1c34ae133e9b9a43d2a6b6be2ceddbc90ba307
diff --git a/src/profiling/memory/client.cc b/src/profiling/memory/client.cc
index 161499b..eba68db 100644
--- a/src/profiling/memory/client.cc
+++ b/src/profiling/memory/client.cc
@@ -116,7 +116,9 @@
}
// static
-std::shared_ptr<Client> Client::CreateAndHandshake(base::UnixSocketRaw sock) {
+std::shared_ptr<Client> Client::CreateAndHandshake(
+ base::UnixSocketRaw sock,
+ UnhookedAllocator<Client> unhooked_allocator) {
if (!sock) {
PERFETTO_DFATAL("Socket not connected.");
return nullptr;
@@ -197,9 +199,10 @@
PERFETTO_DCHECK(client_config.interval >= 1);
Sampler sampler{client_config.interval};
- return std::make_shared<Client>(std::move(sock), client_config,
- std::move(shmem.value()), std::move(sampler),
- FindMainThreadStack());
+ // note: the shared_ptr will retain a copy of the unhooked_allocator
+ return std::allocate_shared<Client>(
+ unhooked_allocator, std::move(sock), client_config,
+ std::move(shmem.value()), std::move(sampler), FindMainThreadStack());
}
Client::Client(base::UnixSocketRaw sock,
diff --git a/src/profiling/memory/client.h b/src/profiling/memory/client.h
index 9de433c..f39af55 100644
--- a/src/profiling/memory/client.h
+++ b/src/profiling/memory/client.h
@@ -27,6 +27,7 @@
#include "perfetto/base/unix_socket.h"
#include "src/profiling/memory/sampler.h"
#include "src/profiling/memory/shared_ring_buffer.h"
+#include "src/profiling/memory/unhooked_allocator.h"
#include "src/profiling/memory/wire_protocol.h"
namespace perfetto {
@@ -43,15 +44,23 @@
//
// Methods of this class are thread-safe unless otherwise stated, in which case
// the caller needs to synchronize calls behind a mutex or similar.
+//
+// Implementation warning: this class should not use any heap, as otherwise its
+// destruction would enter the possibly-hooked |free|, which can reference the
+// Client itself. If avoiding the heap is not possible, then look at using
+// UnhookedAllocator.
class Client {
public:
// Returns a client that is ready for sampling allocations, using the given
// socket (which should already be connected to heapprofd).
//
// Returns a shared_ptr since that is how the client will ultimately be used,
- // and to take advantage of std::make_shared putting the object & the control
- // block in one block of memory.
- static std::shared_ptr<Client> CreateAndHandshake(base::UnixSocketRaw sock);
+ // and to take advantage of std::allocate_shared putting the object & the
+ // control block in one block of memory.
+ static std::shared_ptr<Client> CreateAndHandshake(
+ base::UnixSocketRaw sock,
+ UnhookedAllocator<Client> unhooked_allocator);
+
static base::Optional<base::UnixSocketRaw> ConnectToHeapprofd(
const std::string& sock_name);
@@ -72,8 +81,8 @@
return sampler_.SampleSize(alloc_size);
}
- // Public for std::make_shared. Use CreateAndHandshake() to create instances
- // instead.
+ // Public for std::allocate_shared. Use CreateAndHandshake() to create
+ // instances instead.
Client(base::UnixSocketRaw sock,
ClientConfiguration client_config,
SharedRingBuffer shmem,
diff --git a/src/profiling/memory/heapprofd_end_to_end_test.cc b/src/profiling/memory/heapprofd_end_to_end_test.cc
index 59352f1..525d132 100644
--- a/src/profiling/memory/heapprofd_end_to_end_test.cc
+++ b/src/profiling/memory/heapprofd_end_to_end_test.cc
@@ -27,6 +27,8 @@
#include <sys/system_properties.h>
#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
// This test only works when run on Android using an Android Q version of
// Bionic.
@@ -46,8 +48,8 @@
constexpr useconds_t kMsToUs = 1000;
-using ::testing::Eq;
using ::testing::AnyOf;
+using ::testing::Eq;
class HeapprofdDelegate : public ThreadDelegate {
public:
@@ -779,9 +781,8 @@
ASSERT_EQ(read(*ack_pipe.rd, buf, sizeof(buf)), 0);
ack_pipe.rd.reset();
- // TODO(rsavitski): this sleep is to compensate for the heapprofd delaying
- // in closing the sockets (and therefore the client noticing that the
- // session is over). Clarify where the delays are coming from.
+ // A brief sleep to allow the client to notice that the profiling session is
+ // to be torn down (as it rejects concurrent sessions).
usleep(100 * kMsToUs);
PERFETTO_LOG("HeapprofdEndToEnd::Reinit: Starting second");
@@ -845,6 +846,93 @@
PERFETTO_CHECK(kill(pid, SIGKILL) == 0);
PERFETTO_CHECK(PERFETTO_EINTR(waitpid(pid, nullptr, 0)) == pid);
}
+
+ // TODO(rsavitski): fold exit status assertions into existing tests where
+ // possible.
+ void NativeProfilingActiveAtProcessExit() {
+ constexpr uint64_t kTestAllocSize = 128;
+ base::Pipe start_pipe = base::Pipe::Create(base::Pipe::kBothBlock);
+
+ pid_t pid = fork();
+ if (pid == 0) { // child
+ start_pipe.rd.reset();
+ start_pipe.wr.reset();
+ for (int i = 0; i < 200; i++) {
+ // malloc and leak, otherwise the free batching will cause us to filter
+ // out the allocations (as we don't see the interleaved frees).
+ volatile char* x = static_cast<char*>(malloc(kTestAllocSize));
+ if (x) {
+ x[0] = 'x';
+ }
+ usleep(10 * kMsToUs);
+ }
+ exit(0);
+ }
+
+ ASSERT_NE(pid, -1) << "Failed to fork.";
+ start_pipe.wr.reset();
+
+ // Construct tracing config (without starting profiling).
+ auto helper = GetHelper(&task_runner);
+ TraceConfig trace_config;
+ trace_config.add_buffers()->set_size_kb(10 * 1024);
+ trace_config.set_duration_ms(5000);
+ trace_config.set_flush_timeout_ms(10000);
+
+ auto* ds_config = trace_config.add_data_sources()->mutable_config();
+ ds_config->set_name("android.heapprofd");
+
+ auto* heapprofd_config = ds_config->mutable_heapprofd_config();
+ heapprofd_config->set_sampling_interval_bytes(1);
+ *heapprofd_config->add_pid() = static_cast<uint64_t>(pid);
+
+ // Wait for child to have been scheduled at least once.
+ char buf[1] = {};
+ ASSERT_EQ(PERFETTO_EINTR(read(*start_pipe.rd, buf, sizeof(buf))), 0);
+ start_pipe.rd.reset();
+
+ // Trace until child exits.
+ helper->StartTracing(trace_config);
+
+ siginfo_t siginfo = {};
+ int wait_ret = PERFETTO_EINTR(
+ waitid(P_PID, static_cast<id_t>(pid), &siginfo, WEXITED));
+ ASSERT_FALSE(wait_ret) << "Failed to waitid.";
+
+ // Assert that the child exited successfully.
+ EXPECT_EQ(siginfo.si_code, CLD_EXITED) << "Child did not exit by itself.";
+ EXPECT_EQ(siginfo.si_status, 0) << "Child's exit status not successful.";
+
+ // Assert that we did profile the process.
+ helper->FlushAndWait(2000);
+ helper->DisableTracing();
+ helper->WaitForTracingDisabled(10000);
+ helper->ReadData();
+ helper->WaitForReadData();
+
+ const auto& packets = helper->trace();
+ ASSERT_GT(packets.size(), 0u);
+ size_t profile_packets = 0;
+ size_t samples = 0;
+ uint64_t total_allocated = 0;
+ for (const protos::TracePacket& packet : packets) {
+ if (packet.has_profile_packet() &&
+ packet.profile_packet().process_dumps().size() > 0) {
+ const auto& dumps = packet.profile_packet().process_dumps();
+ ASSERT_EQ(dumps.size(), 1);
+ const protos::ProfilePacket_ProcessHeapSamples& dump = dumps.Get(0);
+ EXPECT_EQ(dump.pid(), pid);
+ profile_packets++;
+ for (const auto& sample : dump.samples()) {
+ samples++;
+ total_allocated += sample.self_allocated();
+ }
+ }
+ }
+ EXPECT_EQ(profile_packets, 1);
+ EXPECT_GT(samples, 0);
+ EXPECT_GT(total_allocated, 0);
+ }
};
// TODO(b/118428762): unwinding is broken at least x86 emulators, blanket-skip
@@ -1024,6 +1112,25 @@
ConcurrentSession();
}
+TEST_F(HeapprofdEndToEnd, NativeProfilingActiveAtProcessExit_Central) {
+ if (IsX86())
+ return;
+
+ auto prop = DisableFork();
+ ASSERT_EQ(ReadProperty(kHeapprofdModeProperty, ""), "");
+ NativeProfilingActiveAtProcessExit();
+}
+
+TEST_F(HeapprofdEndToEnd, NativeProfilingActiveAtProcessExit_Fork) {
+ if (IsX86())
+ return;
+
+ // RAII handle that resets to central mode when out of scope.
+ auto prop = EnableFork();
+ ASSERT_EQ(ReadProperty(kHeapprofdModeProperty, ""), "fork");
+ NativeProfilingActiveAtProcessExit();
+}
+
} // namespace
} // namespace profiling
} // namespace perfetto
diff --git a/src/profiling/memory/malloc_hooks.cc b/src/profiling/memory/malloc_hooks.cc
index d8e6b64..4178610 100644
--- a/src/profiling/memory/malloc_hooks.cc
+++ b/src/profiling/memory/malloc_hooks.cc
@@ -32,14 +32,17 @@
#include "perfetto/base/build_config.h"
#include "perfetto/base/logging.h"
+#include "perfetto/base/no_destructor.h"
#include "perfetto/base/unix_socket.h"
#include "perfetto/base/utils.h"
#include "src/profiling/memory/client.h"
#include "src/profiling/memory/proc_utils.h"
#include "src/profiling/memory/scoped_spinlock.h"
+#include "src/profiling/memory/unhooked_allocator.h"
#include "src/profiling/memory/wire_protocol.h"
using perfetto::profiling::ScopedSpinlock;
+using perfetto::profiling::UnhookedAllocator;
// This is so we can make an so that we can swap out with the existing
// libc_malloc_hooks.so
@@ -114,14 +117,26 @@
// This shared_ptr itself is protected by g_client_lock. Note that shared_ptr
// handles are not thread-safe by themselves:
// https://en.cppreference.com/w/cpp/memory/shared_ptr/atomic
-std::shared_ptr<perfetto::profiling::Client> g_client;
+//
+// To avoid on-destruction re-entrancy issues, this shared_ptr needs to be
+// constructed with an allocator that uses the unhooked malloc & free functions.
+// See UnhookedAllocator.
+//
+// NoDestructor<> wrapper is used to avoid destructing the shared_ptr at program
+// exit. The rationale is:
+// * Avoiding the atexit destructor racing against other threads that are
+// possibly running within the hooks.
+// * Making sure that atexit handlers running after this global's destructor
+// can still safely enter the hooks.
+perfetto::base::NoDestructor<std::shared_ptr<perfetto::profiling::Client>>
+ g_client;
// Protects g_client, and serves as an external lock for sampling decisions (see
// perfetto::profiling::Sampler).
//
-// TODO(rsavitski): consider lifting Sampler into this global scope. Nesting
-// under client is not necessary (though it does highlight that their validity
-// is tied together).
+// We rely on this atomic's destuction being a nop, as it is possible for the
+// hooks to attempt to acquire the spinlock after its destructor should have run
+// (technically a use-after-destruct scenario).
std::atomic<bool> g_client_lock{false};
constexpr char kHeapprofdBinPath[] = "/system/bin/heapprofd";
@@ -134,11 +149,11 @@
// (concurrent calls get discarded).
void ShutdownLazy() {
ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Blocking);
- if (!g_client) // other invocation already initiated shutdown
+ if (!g_client.ref()) // other invocation already initiated shutdown
return;
// Clear primary shared pointer, such that later hook invocations become nops.
- g_client.reset();
+ g_client.ref().reset();
if (!android_mallopt(M_RESET_HOOKS, nullptr, 0))
PERFETTO_PLOG("Unpatching heapprofd hooks failed.");
@@ -180,7 +195,8 @@
return true;
}
-std::shared_ptr<perfetto::profiling::Client> CreateClientForCentralDaemon() {
+std::shared_ptr<perfetto::profiling::Client> CreateClientForCentralDaemon(
+ UnhookedAllocator<perfetto::profiling::Client> unhooked_allocator) {
PERFETTO_DLOG("Constructing client for central daemon.");
using perfetto::profiling::Client;
@@ -188,10 +204,12 @@
Client::ConnectToHeapprofd(perfetto::profiling::kHeapprofdSocketFile);
if (!sock)
return nullptr;
- return Client::CreateAndHandshake(std::move(sock.value()));
+ return Client::CreateAndHandshake(std::move(sock.value()),
+ unhooked_allocator);
}
-std::shared_ptr<perfetto::profiling::Client> CreateClientAndPrivateDaemon() {
+std::shared_ptr<perfetto::profiling::Client> CreateClientAndPrivateDaemon(
+ UnhookedAllocator<perfetto::profiling::Client> unhooked_allocator) {
PERFETTO_DLOG("Setting up fork mode profiling.");
perfetto::base::UnixSocketRaw parent_sock;
perfetto::base::UnixSocketRaw child_sock;
@@ -260,8 +278,8 @@
return nullptr;
}
- return perfetto::profiling::Client::CreateAndHandshake(
- std::move(parent_sock));
+ return perfetto::profiling::Client::CreateAndHandshake(std::move(parent_sock),
+ unhooked_allocator);
}
} // namespace
@@ -284,14 +302,20 @@
ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Blocking);
- if (g_client) {
+ if (g_client.ref()) {
PERFETTO_LOG("Rejecting concurrent profiling initialization.");
return true; // success as we're in a valid state
}
+ // The dispatch table never changes, so let the custom allocator retain the
+ // function pointers directly.
+ UnhookedAllocator<perfetto::profiling::Client> unhooked_allocator(
+ malloc_dispatch->malloc, malloc_dispatch->free);
+
std::shared_ptr<perfetto::profiling::Client> client =
- ShouldForkPrivateDaemon() ? CreateClientAndPrivateDaemon()
- : CreateClientForCentralDaemon();
+ ShouldForkPrivateDaemon()
+ ? CreateClientAndPrivateDaemon(unhooked_allocator)
+ : CreateClientForCentralDaemon(unhooked_allocator);
if (!client) {
PERFETTO_LOG("Client not initialized, not installing hooks.");
@@ -299,7 +323,7 @@
}
PERFETTO_DLOG("Client initialized.");
- g_client = std::move(client);
+ g_client.ref() = std::move(client);
return true;
}
@@ -324,15 +348,15 @@
std::shared_ptr<perfetto::profiling::Client> client;
{
ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Blocking);
- if (!g_client) // no active client (most likely shutting down)
+ if (!g_client.ref()) // no active client (most likely shutting down)
return;
- sampled_alloc_sz = g_client->GetSampleSizeLocked(size);
+ sampled_alloc_sz = g_client.ref()->GetSampleSizeLocked(size);
if (sampled_alloc_sz == 0) // not sampling
return;
- client = g_client; // owning copy
- } // unlock
+ client = g_client.ref(); // owning copy
+ } // unlock
if (!client->RecordMalloc(size, sampled_alloc_sz,
reinterpret_cast<uint64_t>(addr))) {
@@ -388,7 +412,7 @@
std::shared_ptr<perfetto::profiling::Client> client;
{
ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Blocking);
- client = g_client; // owning copy (or empty)
+ client = g_client.ref(); // owning copy (or empty)
}
if (client) {
@@ -415,9 +439,9 @@
ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Blocking);
// If there is no active client, we still want to reach the backing realloc,
// so keep going.
- if (g_client) {
- client = g_client; // owning copy
- sampled_alloc_sz = g_client->GetSampleSizeLocked(size);
+ if (g_client.ref()) {
+ client = g_client.ref(); // owning copy
+ sampled_alloc_sz = g_client.ref()->GetSampleSizeLocked(size);
}
} // unlock
diff --git a/src/profiling/memory/unhooked_allocator.h b/src/profiling/memory/unhooked_allocator.h
new file mode 100644
index 0000000..45a6262
--- /dev/null
+++ b/src/profiling/memory/unhooked_allocator.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PROFILING_MEMORY_UNHOOKED_ALLOCATOR_H_
+#define SRC_PROFILING_MEMORY_UNHOOKED_ALLOCATOR_H_
+
+#include <stdlib.h>
+
+#include <atomic>
+#include <exception>
+
+namespace perfetto {
+namespace profiling {
+
+// An allocator that uses the given |malloc| & |free| function pointers to
+// allocate/deallocate memory. Used in the heapprofd_client library, which hooks
+// the malloc functions in a target process, and therefore needs to use this
+// allocator to not have its own heap usage enter the hooks.
+//
+// See https://en.cppreference.com/w/cpp/named_req/Allocator.
+template <typename T>
+class UnhookedAllocator {
+ public:
+ template <typename U>
+ friend class UnhookedAllocator;
+
+ // to satisfy the Allocator interface
+ using value_type = T;
+
+ // for implementation convenience
+ using unhooked_malloc_t = void* (*)(size_t);
+ using unhooked_free_t = void (*)(void*);
+
+ UnhookedAllocator(unhooked_malloc_t unhooked_malloc,
+ unhooked_free_t unhooked_free)
+ : unhooked_malloc_(unhooked_malloc), unhooked_free_(unhooked_free) {}
+
+ template <typename U>
+ constexpr UnhookedAllocator(const UnhookedAllocator<U>& other) noexcept
+ : unhooked_malloc_(other.unhooked_malloc_),
+ unhooked_free_(other.unhooked_free_) {}
+
+ T* allocate(size_t n) {
+ if (n > size_t(-1) / sizeof(T))
+ std::terminate();
+ if (T* p = static_cast<T*>(unhooked_malloc_(n * sizeof(T))))
+ return p;
+ std::terminate();
+ }
+
+ void deallocate(T* p, size_t) noexcept { unhooked_free_(p); }
+
+ private:
+ unhooked_malloc_t unhooked_malloc_ = nullptr;
+ unhooked_free_t unhooked_free_ = nullptr;
+};
+
+template <typename T, typename U>
+bool operator==(const UnhookedAllocator<T>& first,
+ const UnhookedAllocator<U>& second) {
+ return first.unhooked_malloc_ == second.unhooked_malloc_ &&
+ first.unhooked_free_ == second.unhooked_free_;
+}
+
+template <typename T, typename U>
+bool operator!=(const UnhookedAllocator<T>& first,
+ const UnhookedAllocator<U>& second) {
+ return !(first == second);
+}
+
+} // namespace profiling
+} // namespace perfetto
+
+#endif // SRC_PROFILING_MEMORY_UNHOOKED_ALLOCATOR_H_