profiling: Add sampler.

Change-Id: I23e58648423c696648527d277ca6a7ef40a92dca
diff --git a/Android.bp b/Android.bp
index e7f0d6c..b6dadc0 100644
--- a/Android.bp
+++ b/Android.bp
@@ -3850,6 +3850,8 @@
     "src/profiling/memory/heapprofd_integrationtest.cc",
     "src/profiling/memory/record_reader.cc",
     "src/profiling/memory/record_reader_unittest.cc",
+    "src/profiling/memory/sampler.cc",
+    "src/profiling/memory/sampler_unittest.cc",
     "src/profiling/memory/socket_listener.cc",
     "src/profiling/memory/socket_listener_unittest.cc",
     "src/profiling/memory/string_interner.cc",
diff --git a/src/profiling/memory/BUILD.gn b/src/profiling/memory/BUILD.gn
index 78e57c9..663a059 100644
--- a/src/profiling/memory/BUILD.gn
+++ b/src/profiling/memory/BUILD.gn
@@ -62,6 +62,8 @@
   sources = [
     "client.cc",
     "client.h",
+    "sampler.cc",
+    "sampler.h",
   ]
 }
 
@@ -83,6 +85,7 @@
     "client_unittest.cc",
     "heapprofd_integrationtest.cc",
     "record_reader_unittest.cc",
+    "sampler_unittest.cc",
     "socket_listener_unittest.cc",
     "string_interner_unittest.cc",
     "unwinding_unittest.cc",
diff --git a/src/profiling/memory/sampler.cc b/src/profiling/memory/sampler.cc
new file mode 100644
index 0000000..eba955e
--- /dev/null
+++ b/src/profiling/memory/sampler.cc
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/profiling/memory/sampler.h"
+
+#include "perfetto/base/utils.h"
+
+namespace perfetto {
+namespace {
+ThreadLocalSamplingData* GetSpecific(pthread_key_t key,
+                                     void* (*unhooked_malloc)(size_t),
+                                     void (*unhooked_free)(void*)) {
+  // This should not be used with glibc as it might re-enter into malloc, see
+  // http://crbug.com/776475.
+  void* specific = pthread_getspecific(key);
+  if (specific == nullptr) {
+    specific = unhooked_malloc(sizeof(ThreadLocalSamplingData));
+    new (specific) ThreadLocalSamplingData(unhooked_free);
+    pthread_setspecific(key, specific);
+  }
+  return reinterpret_cast<ThreadLocalSamplingData*>(specific);
+}
+}  // namespace
+
+// The algorithm below is a inspired by the Chromium sampling algorithm at
+// https://cs.chromium.org/search/?q=f:cc+symbol:AllocatorShimLogAlloc+package:%5Echromium$&type=cs
+
+int64_t ThreadLocalSamplingData::NextSampleInterval(double rate) {
+  std::exponential_distribution<double> dist(1 / rate);
+  int64_t next = static_cast<int64_t>(dist(random_engine_));
+  return next < 1 ? 1 : next;
+}
+
+size_t ThreadLocalSamplingData::ShouldSample(size_t sz, double rate) {
+  interval_to_next_sample_ -= sz;
+  size_t sz_multiplier = 0;
+  while (PERFETTO_UNLIKELY(interval_to_next_sample_ <= 0)) {
+    interval_to_next_sample_ += NextSampleInterval(rate);
+    ++sz_multiplier;
+  }
+  return sz_multiplier;
+}
+
+size_t ShouldSample(pthread_key_t key,
+                    size_t sz,
+                    double rate,
+                    void* (*unhooked_malloc)(size_t),
+                    void (*unhooked_free)(void*)) {
+  if (PERFETTO_UNLIKELY(sz >= rate))
+    return 1;
+  return GetSpecific(key, unhooked_malloc, unhooked_free)
+      ->ShouldSample(sz, rate);
+}
+
+void ThreadLocalSamplingData::KeyDestructor(void* ptr) {
+  ThreadLocalSamplingData* thread_local_data =
+      reinterpret_cast<ThreadLocalSamplingData*>(ptr);
+  void (*unhooked_free)(void*) = thread_local_data->unhooked_free_;
+  thread_local_data->~ThreadLocalSamplingData();
+  unhooked_free(ptr);
+}
+
+}  // namespace perfetto
diff --git a/src/profiling/memory/sampler.h b/src/profiling/memory/sampler.h
new file mode 100644
index 0000000..879e2a2
--- /dev/null
+++ b/src/profiling/memory/sampler.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PROFILING_MEMORY_SAMPLER_H_
+#define SRC_PROFILING_MEMORY_SAMPLER_H_
+
+#include <pthread.h>
+#include <stdint.h>
+
+#include <random>
+
+namespace perfetto {
+
+// This is the thread-local state needed to apply poission sampling to malloc
+// samples.
+//
+// We apply poisson sampling individually to each byte. The whole
+// allocation gets accounted as often as the number of sampled bytes it
+// contains.
+//
+// Googlers see go/chrome-shp for more details about the sampling (from
+// Chrome's heap profiler).
+class ThreadLocalSamplingData {
+ public:
+  ThreadLocalSamplingData(void (*unhooked_free)(void*))
+      : unhooked_free_(unhooked_free) {}
+  // Returns number of times a sample should be accounted. Due to how the
+  // poission sampling works, some samples should be accounted multiple times.
+  size_t ShouldSample(size_t sz, double rate);
+
+  // Destroy a TheadLocalSamplingData object after the pthread key has been
+  // deleted or when the thread shuts down. This uses unhooked_free passed in
+  // the constructor.
+  static void KeyDestructor(void* ptr);
+
+ private:
+  int64_t NextSampleInterval(double rate);
+  void (*unhooked_free_)(void*);
+  int64_t interval_to_next_sample_ = 0;
+  std::default_random_engine random_engine_;
+};
+
+// Returns number of times a sample should be accounted. Due to how the
+// poission sampling works, some samples should be accounted multiple times.
+//
+// Delegate to this thread's ThreadLocalSamplingData.
+//
+// We have to pass through the real malloc in order to allocate the TLS.
+size_t ShouldSample(pthread_key_t key,
+                    size_t sz,
+                    double rate,
+                    void* (*unhooked_malloc)(size_t),
+                    void (*unhooked_free)(void*));
+
+}  // namespace perfetto
+
+#endif  // SRC_PROFILING_MEMORY_SAMPLER_H_
diff --git a/src/profiling/memory/sampler_unittest.cc b/src/profiling/memory/sampler_unittest.cc
new file mode 100644
index 0000000..d598e71
--- /dev/null
+++ b/src/profiling/memory/sampler_unittest.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/profiling/memory/sampler.h"
+
+#include "gtest/gtest.h"
+
+#include <thread>
+
+namespace perfetto {
+namespace {
+
+TEST(SamplerTest, TestLarge) {
+  pthread_key_t key;
+  ASSERT_EQ(pthread_key_create(&key, ThreadLocalSamplingData::KeyDestructor),
+            0);
+  EXPECT_EQ(ShouldSample(key, 1024, 512, malloc, free), 1);
+  pthread_key_delete(key);
+}
+
+TEST(SamplerTest, TestSmall) {
+  pthread_key_t key;
+  ASSERT_EQ(pthread_key_create(&key, ThreadLocalSamplingData::KeyDestructor),
+            0);
+  // As we initialize interval_to_next_sample_ with 0, the first sample
+  // should always get sampled.
+  EXPECT_EQ(ShouldSample(key, 1, 512, malloc, free), 1);
+  pthread_key_delete(key);
+}
+
+TEST(SamplerTest, TestSmallFromThread) {
+  pthread_key_t key;
+  ASSERT_EQ(pthread_key_create(&key, ThreadLocalSamplingData::KeyDestructor),
+            0);
+  std::thread th([key] {
+    // As we initialize interval_to_next_sample_ with 0, the first sample
+    // should always get sampled.
+    EXPECT_EQ(ShouldSample(key, 1, 512, malloc, free), 1);
+  });
+  std::thread th2([key] {
+    // The threads should have separate state.
+    EXPECT_EQ(ShouldSample(key, 1, 512, malloc, free), 1);
+  });
+  th.join();
+  th2.join();
+  pthread_key_delete(key);
+}
+
+}  // namespace
+}  // namespace perfetto