Have separate sampler per heap.

This will allow us to have separate sampling rates per heap (e.g. ART
Allocation Tracking could be higher volume, so we want a higher
interval).  This also is necessary if we want to allow clients to report
samples rather than allocations for their heap, because that will cause
that heap to have a separate (implicit) sampler, while all the others
share one.

Bug: 141241849
Bug: 160214819
Test: Profile system_server
Test: atest perfetto_integrationtests
Change-Id: Id949d668df561e7fa23efc6bd1fd92a92cc6c08e
diff --git a/Android.bp b/Android.bp
index 995769c..944f699 100644
--- a/Android.bp
+++ b/Android.bp
@@ -6311,6 +6311,7 @@
   name: "perfetto_src_profiling_memory_client",
   srcs: [
     "src/profiling/memory/client.cc",
+    "src/profiling/memory/sampler.cc",
   ],
 }
 
diff --git a/src/profiling/memory/BUILD.gn b/src/profiling/memory/BUILD.gn
index fb1c052..6186eb3 100644
--- a/src/profiling/memory/BUILD.gn
+++ b/src/profiling/memory/BUILD.gn
@@ -283,6 +283,7 @@
   sources = [
     "client.cc",
     "client.h",
+    "sampler.cc",
     "sampler.h",
   ]
 }
diff --git a/src/profiling/memory/client.cc b/src/profiling/memory/client.cc
index 1b29bb3..67485e9 100644
--- a/src/profiling/memory/client.cc
+++ b/src/profiling/memory/client.cc
@@ -252,23 +252,19 @@
 
   PERFETTO_DCHECK(client_config.interval >= 1);
   sock.SetBlocking(false);
-  Sampler sampler{client_config.interval};
   // note: the shared_ptr will retain a copy of the unhooked_allocator
   return std::allocate_shared<Client>(unhooked_allocator, std::move(sock),
                                       client_config, std::move(shmem.value()),
-                                      std::move(sampler), getpid(),
-                                      GetMainThreadStackRange());
+                                      getpid(), GetMainThreadStackRange());
 }
 
 Client::Client(base::UnixSocketRaw sock,
                ClientConfiguration client_config,
                SharedRingBuffer shmem,
-               Sampler sampler,
                pid_t pid_at_creation,
                StackRange main_thread_stack_range)
     : client_config_(client_config),
       max_shmem_tries_(GetMaxTries(client_config_)),
-      sampler_(std::move(sampler)),
       sock_(std::move(sock)),
       main_thread_stack_range_(main_thread_stack_range),
       shmem_(std::move(shmem)),
diff --git a/src/profiling/memory/client.h b/src/profiling/memory/client.h
index 4d18898..5527421 100644
--- a/src/profiling/memory/client.h
+++ b/src/profiling/memory/client.h
@@ -91,22 +91,11 @@
     shmem_.AddClientSpinlockBlockedUs(n);
   }
 
-  // Returns the number of bytes to assign to an allocation with the given
-  // |alloc_size|, based on the current sampling rate. A return value of zero
-  // means that the allocation should not be recorded. Not idempotent, each
-  // invocation mutates the sampler state.
-  //
-  // Not thread-safe.
-  size_t GetSampleSizeLocked(size_t alloc_size) {
-    return sampler_.SampleSize(alloc_size);
-  }
-
   // Public for std::allocate_shared. Use CreateAndHandshake() to create
   // instances instead.
   Client(base::UnixSocketRaw sock,
          ClientConfiguration client_config,
          SharedRingBuffer shmem,
-         Sampler sampler,
          pid_t pid_at_creation,
          StackRange main_thread_stack_range);
 
@@ -127,8 +116,6 @@
 
   ClientConfiguration client_config_;
   uint64_t max_shmem_tries_;
-  // sampler_ operations are not thread-safe.
-  Sampler sampler_;
   base::UnixSocketRaw sock_;
 
   StackRange main_thread_stack_range_{nullptr, nullptr};
diff --git a/src/profiling/memory/client_api.cc b/src/profiling/memory/client_api.cc
index b3b84e4..31f8023 100644
--- a/src/profiling/memory/client_api.cc
+++ b/src/profiling/memory/client_api.cc
@@ -52,6 +52,7 @@
   void (*callback)(bool enabled);
 
   // Internal fields.
+  perfetto::profiling::Sampler sampler;
   std::atomic<bool> ready;
   std::atomic<bool> enabled;
 };
@@ -252,7 +253,7 @@
 
 __attribute__((visibility("default"))) bool
 AHeapProfile_reportAllocation(uint32_t heap_id, uint64_t id, uint64_t size) {
-  const AHeapInfo& heap = GetHeap(heap_id);
+  AHeapInfo& heap = GetHeap(heap_id);
   if (!heap.enabled.load(std::memory_order_acquire)) {
     return false;
   }
@@ -271,8 +272,7 @@
       (*g_client_ptr)->AddClientSpinlockBlockedUs(s.blocked_us());
     }
 
-    sampled_alloc_sz =
-        (*g_client_ptr)->GetSampleSizeLocked(static_cast<size_t>(size));
+    sampled_alloc_sz = heap.sampler.SampleSize(static_cast<size_t>(size));
     if (sampled_alloc_sz == 0)  // not sampling
       return false;
 
@@ -363,12 +363,15 @@
   const perfetto::profiling::ClientConfiguration& cli_config =
       client->client_config();
 
-  for (uint32_t i = kMinHeapId; i < g_next_heap_id.load(); ++i) {
+  bool matched_heaps[perfetto::base::ArraySize(g_heaps)] = {};
+  uint32_t max_heap = g_next_heap_id.load();
+  for (uint32_t i = kMinHeapId; i < max_heap; ++i) {
     AHeapInfo& heap = GetHeap(i);
     if (!heap.ready.load(std::memory_order_acquire))
       continue;
 
-    bool matched = cli_config.all_heaps;
+    bool& matched = matched_heaps[i];
+    matched = cli_config.all_heaps;
     for (uint32_t j = 0; !matched && j < cli_config.num_heaps; ++j) {
       static_assert(sizeof(g_heaps[0].heap_name) == HEAPPROFD_HEAP_NAME_SZ,
                     "correct heap name size");
@@ -379,6 +382,8 @@
         matched = true;
       }
     }
+    // The callbacks must be called while NOT LOCKED. Because they run
+    // arbitrary code, it would be very easy to build a deadlock.
     if (matched) {
       if (!heap.enabled.load(std::memory_order_acquire) && heap.callback)
         heap.callback(true);
@@ -391,12 +396,22 @@
         heap.callback(false);
     }
   }
+
   PERFETTO_LOG("%s: heapprofd_client initialized.", getprogname());
   {
     ScopedSpinlock s(&g_client_lock, ScopedSpinlock::Mode::Try);
     if (PERFETTO_UNLIKELY(!s.locked()))
       AbortOnSpinlockTimeout();
 
+    // This needs to happen under the lock for mutual exclusion regarding the
+    // random engine.
+    for (uint32_t i = kMinHeapId; i < max_heap; ++i) {
+      AHeapInfo& heap = GetHeap(i);
+      if (matched_heaps[i]) {
+        heap.sampler.SetSamplingInterval(cli_config.interval);
+      }
+    }
+
     // This cannot have been set in the meantime. There are never two concurrent
     // calls to this function, as Bionic uses atomics to guard against that.
     PERFETTO_DCHECK(*GetClientLocked() == nullptr);
diff --git a/src/profiling/memory/sampler.cc b/src/profiling/memory/sampler.cc
new file mode 100644
index 0000000..9a05fc3
--- /dev/null
+++ b/src/profiling/memory/sampler.cc
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/profiling/memory/sampler.h"
+
+namespace perfetto {
+namespace profiling {
+
+std::default_random_engine& GetGlobalRandomEngineLocked() {
+  static std::default_random_engine engine;
+  return engine;
+}
+
+}  // namespace profiling
+}  // namespace perfetto
diff --git a/src/profiling/memory/sampler.h b/src/profiling/memory/sampler.h
index 8779201..d3d3fc0 100644
--- a/src/profiling/memory/sampler.h
+++ b/src/profiling/memory/sampler.h
@@ -29,6 +29,8 @@
 
 constexpr uint64_t kSamplerSeed = 1;
 
+std::default_random_engine& GetGlobalRandomEngineLocked();
+
 // Poisson sampler for memory allocations. We apply sampling individually to
 // each byte. The whole allocation gets accounted as often as the number of
 // sampled bytes it contains.
@@ -40,11 +42,11 @@
 // NB: not thread-safe, requires external synchronization.
 class Sampler {
  public:
-  explicit Sampler(uint64_t sampling_interval)
-      : sampling_interval_(sampling_interval),
-        sampling_rate_(1.0 / static_cast<double>(sampling_interval)),
-        random_engine_(kSamplerSeed),
-        interval_to_next_sample_(NextSampleInterval()) {}
+  void SetSamplingInterval(uint64_t sampling_interval) {
+    sampling_interval_ = sampling_interval;
+    sampling_rate_ = 1.0 / static_cast<double>(sampling_interval_);
+    interval_to_next_sample_ = NextSampleInterval();
+  }
 
   // Returns number of bytes that should be be attributed to the sample.
   // If returned size is 0, the allocation should not be sampled.
@@ -60,7 +62,7 @@
  private:
   int64_t NextSampleInterval() {
     std::exponential_distribution<double> dist(sampling_rate_);
-    int64_t next = static_cast<int64_t>(dist(random_engine_));
+    int64_t next = static_cast<int64_t>(dist(GetGlobalRandomEngineLocked()));
     // The +1 corrects the distribution of the first value in the interval.
     // TODO(fmayer): Figure out why.
     return next + 1;
@@ -80,7 +82,6 @@
 
   uint64_t sampling_interval_;
   double sampling_rate_;
-  std::default_random_engine random_engine_;
   int64_t interval_to_next_sample_;
 };
 
diff --git a/src/profiling/memory/sampler_unittest.cc b/src/profiling/memory/sampler_unittest.cc
index db903ff..409fa4c 100644
--- a/src/profiling/memory/sampler_unittest.cc
+++ b/src/profiling/memory/sampler_unittest.cc
@@ -25,17 +25,23 @@
 namespace {
 
 TEST(SamplerTest, TestLarge) {
-  Sampler sampler(512);
+  GetGlobalRandomEngineLocked().seed(1);
+  Sampler sampler;
+  sampler.SetSamplingInterval(512);
   EXPECT_EQ(sampler.SampleSize(1024), 1024u);
 }
 
 TEST(SamplerTest, TestSmall) {
-  Sampler sampler(512);
+  GetGlobalRandomEngineLocked().seed(1);
+  Sampler sampler;
+  sampler.SetSamplingInterval(512);
   EXPECT_EQ(sampler.SampleSize(511), 512u);
 }
 
 TEST(SamplerTest, TestSequence) {
-  Sampler sampler(1);
+  GetGlobalRandomEngineLocked().seed(1);
+  Sampler sampler;
+  sampler.SetSamplingInterval(1);
   EXPECT_EQ(sampler.SampleSize(3), 3u);
   EXPECT_EQ(sampler.SampleSize(7), 7u);
   EXPECT_EQ(sampler.SampleSize(5), 5u);