protozero: simplify packed field and allow heap growth

This CL makes the code that handles packed repeated
field encoding simpler, more ergonomic and easy to use.
It mainly  adds support for dynamic growing on the heap.

The driving rationale of this CL is the upcoming code
that will handle proto parsing and re-encoding without
libprotobuf, which needs to deal with packed fields.
In that context, the code cannot have an upper bound,
it just needs to pass through as many fields as necessary.

This CL catches the occasion to simplify the code that
handles packed fields. The previous architecture was
too clever, had too many extensions points which turned
out to be hard to really extend.
This CL is simplifying that code making the following
assumptions:
- We don't really care about making the stack size
  configurable. One size should fit everybody.
- Having a hard-limit for StackAllocated has always been
  brittle. There seems to be lot of clever code in ftrace
  that tries to size everything accordingly to this limtation.
- Less code and less templates overall, the code seems easier
  to reason about now.

Bug: 132880619
Test: perfetto_unittests
Change-Id: Ic3397b101e14482358f9755673ce196c0a3a1bb7
diff --git a/Android.bp b/Android.bp
index 5fe6780..e6487db 100644
--- a/Android.bp
+++ b/Android.bp
@@ -4396,6 +4396,7 @@
   srcs: [
     "src/protozero/message.cc",
     "src/protozero/message_handle.cc",
+    "src/protozero/packed_repeated_fields.cc",
     "src/protozero/proto_decoder.cc",
     "src/protozero/scattered_heap_buffer.cc",
     "src/protozero/scattered_stream_null_delegate.cc",
diff --git a/BUILD b/BUILD
index cf821db..80ba731 100644
--- a/BUILD
+++ b/BUILD
@@ -530,6 +530,7 @@
     srcs = [
         "src/protozero/message.cc",
         "src/protozero/message_handle.cc",
+        "src/protozero/packed_repeated_fields.cc",
         "src/protozero/proto_decoder.cc",
         "src/protozero/scattered_heap_buffer.cc",
         "src/protozero/scattered_stream_null_delegate.cc",
diff --git a/include/perfetto/protozero/packed_repeated_fields.h b/include/perfetto/protozero/packed_repeated_fields.h
index 9d46196..dff45e7 100644
--- a/include/perfetto/protozero/packed_repeated_fields.h
+++ b/include/perfetto/protozero/packed_repeated_fields.h
@@ -20,6 +20,8 @@
 #include <stdint.h>
 
 #include <array>
+#include <memory>
+#include <type_traits>
 
 #include "perfetto/base/logging.h"
 #include "perfetto/protozero/proto_utils.h"
@@ -30,39 +32,30 @@
 // To encode such a field, the caller is first expected to accumulate all of the
 // values in one of the following types (depending on the wire type of the
 // individual elements), defined below:
-// * protozero::PackedVarIntBuffer
-// * protozero::PackedFixedSizeBuffer</*element_type=*/ uint32_t>
+// * protozero::PackedVarInt
+// * protozero::PackedFixedSizeInt</*element_type=*/ uint32_t>
 // Then that buffer is passed to the protozero-generated setters as an argument.
 // After calling the setter, the buffer can be destroyed.
 //
-// The buffer classes by themselves do not own the raw memory used to hold the
-// accumulated values, so they need to be instantiated via a subclass that
-// implements the storage. This file provides one such type, StackAllocated, for
-// buffering the values using the stack.
-//
 // An example of encoding a packed field:
 //   protozero::HeapBuffered<protozero::Message> msg;
-//   // stack buffer holding up to 128 varints
-//   protozero::StackAllocated<protozero::PackedVarIntBuffer, 128> buf;
+//   protozero::PackedVarInt buf;
 //   buf.Append(42);
 //   buf.Append(-1);
 //   msg->set_fieldname(buf);
 //   msg.SerializeAsString();
-class PackedVarIntBuffer {
+
+class PackedBufferBase {
  public:
-  // worst case encoded size per varint
-  using StorageElementType = std::array<uint8_t, 10>;
+  PackedBufferBase() { Reset(); }
 
-  template <typename T>
-  void Append(T value) {
-    PERFETTO_CHECK(++element_size_ <= element_capacity_);
-    write_ptr_ = proto_utils::WriteVarInt(value, write_ptr_);
-  }
+  // Copy or move is disabled due to pointers to stack addresses.
+  PackedBufferBase(const PackedBufferBase&) = delete;
+  PackedBufferBase(PackedBufferBase&&) = delete;
+  PackedBufferBase& operator=(const PackedBufferBase&) = delete;
+  PackedBufferBase& operator=(PackedBufferBase&&) = delete;
 
-  void Reset() {
-    write_ptr_ = storage_begin_;
-    element_size_ = 0;
-  }
+  void Reset();
 
   const uint8_t* data() const { return storage_begin_; }
 
@@ -71,69 +64,50 @@
   }
 
  protected:
-  PackedVarIntBuffer(StorageElementType* storage, size_t storage_capacity)
-      : storage_begin_(reinterpret_cast<uint8_t*>(storage)),
-        write_ptr_(reinterpret_cast<uint8_t*>(storage)),
-        element_capacity_(storage_capacity) {}
+  void GrowIfNeeded() {
+    PERFETTO_DCHECK(write_ptr_ >= storage_begin_ && write_ptr_ <= storage_end_);
+    if (PERFETTO_UNLIKELY(write_ptr_ + kMaxElementSize > storage_end_)) {
+      GrowSlowpath();
+    }
+  }
 
- private:
-  // Storage will consist of an array of StorageElementType, but we treat it as
-  // a contiguous uint8_t buffer. Note that the varints will not be aligned with
-  // StorageElementType boundaries.
-  uint8_t* const storage_begin_;
+  void GrowSlowpath();
+
+  // max(uint64_t varint encoding, biggest fixed type (uint64)).
+  static constexpr size_t kMaxElementSize = 10;
+
+  // So sizeof(this) == 8k.
+  static constexpr size_t kOnStackStorageSize = 8192 - 32;
+
+  uint8_t* storage_begin_;
+  uint8_t* storage_end_;
   uint8_t* write_ptr_;
-
-  size_t element_size_ = 0;
-  const size_t element_capacity_;
+  std::unique_ptr<uint8_t[]> heap_buf_;
+  alignas(uint64_t) uint8_t stack_buf_[kOnStackStorageSize];
 };
 
-template <typename ElementType>
-class PackedFixedSizeBuffer {
+class PackedVarInt : public PackedBufferBase {
  public:
-  // The template type parameter is the same as the storage type, so one
-  // of the type names is used in all places for consistency.
-  using StorageElementType = ElementType;
-
-  void Append(StorageElementType value) {
-    PERFETTO_CHECK(write_ptr_ < storage_end_);
-    *(write_ptr_++) = value;
+  template <typename T>
+  void Append(T value) {
+    GrowIfNeeded();
+    write_ptr_ = proto_utils::WriteVarInt(value, write_ptr_);
   }
-
-  void Reset() { write_ptr_ = storage_begin_; }
-
-  const uint8_t* data() const {
-    return reinterpret_cast<const uint8_t*>(storage_begin_);
-  }
-
-  size_t size() const {
-    return static_cast<size_t>(reinterpret_cast<uint8_t*>(write_ptr_) -
-                               reinterpret_cast<uint8_t*>(storage_begin_));
-  }
-
- protected:
-  PackedFixedSizeBuffer(StorageElementType* storage,
-                        size_t max_storage_elements)
-      : storage_begin_(storage),
-        storage_end_(storage + max_storage_elements),
-        write_ptr_(storage) {
-    static_assert(
-        sizeof(StorageElementType) == 4 || sizeof(StorageElementType) == 8,
-        "invalid type width");
-  }
-
- private:
-  StorageElementType* const storage_begin_;
-  StorageElementType* const storage_end_;
-  StorageElementType* write_ptr_;
 };
 
-template <typename PackedBuffer, size_t MaxNumElements>
-class StackAllocated : public PackedBuffer {
+template <typename T /* e.g. uint32_t for Fixed32 */>
+class PackedFixedSizeInt : public PackedBufferBase {
  public:
-  StackAllocated() : PackedBuffer(storage_, MaxNumElements) {}
-
- private:
-  typename PackedBuffer::StorageElementType storage_[MaxNumElements];
+  void Append(T value) {
+    static_assert(sizeof(T) == 4 || sizeof(T) == 8,
+                  "PackedFixedSizeInt should be used only with 32/64-bit ints");
+    static_assert(sizeof(T) <= kMaxElementSize,
+                  "kMaxElementSize needs to be updated");
+    GrowIfNeeded();
+    PERFETTO_DCHECK(reinterpret_cast<size_t>(write_ptr_) % alignof(T) == 0);
+    memcpy(reinterpret_cast<T*>(write_ptr_), &value, sizeof(T));
+    write_ptr_ += sizeof(T);
+  }
 };
 
 }  // namespace protozero
diff --git a/src/protozero/BUILD.gn b/src/protozero/BUILD.gn
index 398ad85..bcc34e0 100644
--- a/src/protozero/BUILD.gn
+++ b/src/protozero/BUILD.gn
@@ -30,6 +30,7 @@
   sources = [
     "message.cc",
     "message_handle.cc",
+    "packed_repeated_fields.cc",
     "proto_decoder.cc",
     "scattered_heap_buffer.cc",
     "scattered_stream_null_delegate.cc",
diff --git a/src/protozero/packed_repeated_fields.cc b/src/protozero/packed_repeated_fields.cc
new file mode 100644
index 0000000..b0e23c5
--- /dev/null
+++ b/src/protozero/packed_repeated_fields.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/protozero/packed_repeated_fields.h"
+
+#include "perfetto/ext/base/utils.h"
+
+namespace protozero {
+
+// static
+constexpr size_t PackedBufferBase::kOnStackStorageSize;
+
+void PackedBufferBase::GrowSlowpath() {
+  size_t write_off = static_cast<size_t>(write_ptr_ - storage_begin_);
+  size_t old_size = static_cast<size_t>(storage_end_ - storage_begin_);
+  size_t new_size = old_size < 65536 ? (old_size * 2) : (old_size * 3 / 2);
+  new_size = perfetto::base::AlignUp<4096>(new_size);
+  std::unique_ptr<uint8_t[]> new_buf(new uint8_t[new_size]);
+  memcpy(new_buf.get(), storage_begin_, old_size);
+  heap_buf_ = std::move(new_buf);
+  storage_begin_ = heap_buf_.get();
+  storage_end_ = storage_begin_ + new_size;
+  write_ptr_ = storage_begin_ + write_off;
+}
+
+void PackedBufferBase::Reset() {
+  heap_buf_.reset();
+  storage_begin_ = reinterpret_cast<uint8_t*>(&stack_buf_[0]);
+  storage_end_ = reinterpret_cast<uint8_t*>(&stack_buf_[kOnStackStorageSize]);
+  write_ptr_ = storage_begin_;
+}
+
+}  // namespace protozero
diff --git a/src/protozero/proto_decoder_unittest.cc b/src/protozero/proto_decoder_unittest.cc
index 75f3b7e..1a63cef 100644
--- a/src/protozero/proto_decoder_unittest.cc
+++ b/src/protozero/proto_decoder_unittest.cc
@@ -442,7 +442,7 @@
 
 TEST(ProtoDecoderTest, ZeroLengthPackedRepeatedField) {
   HeapBuffered<pbtest::PackedRepeatedFields> msg;
-  StackAllocated<PackedVarIntBuffer, 8> buf;
+  PackedVarInt buf;
   msg->set_field_int32(buf);
   std::string serialized = msg.SerializeAsString();
 
@@ -461,7 +461,7 @@
 TEST(ProtoDecoderTest, MalformedPackedFixedBuffer) {
   // Encode a fixed32 field where the length is not a multiple of 4 bytes.
   HeapBuffered<pbtest::PackedRepeatedFields> msg;
-  StackAllocated<PackedFixedSizeBuffer<uint32_t>, 8> buf;
+  PackedFixedSizeInt<uint32_t> buf;
   buf.Append(1);
   buf.Append(2);
   buf.Append(3);
@@ -486,7 +486,7 @@
 TEST(ProtoDecoderTest, MalformedPackedVarIntBuffer) {
   // Encode a varint field with the last varint chopped off partway.
   HeapBuffered<pbtest::PackedRepeatedFields> msg;
-  StackAllocated<PackedVarIntBuffer, 8> buf;
+  PackedVarInt buf;
   buf.Append(1024);
   buf.Append(2048);
   buf.Append(4096);
diff --git a/src/protozero/protoc_plugin/protozero_plugin.cc b/src/protozero/protoc_plugin/protozero_plugin.cc
index 3c9e882..72d380c 100644
--- a/src/protozero/protoc_plugin/protozero_plugin.cc
+++ b/src/protozero/protoc_plugin/protozero_plugin.cc
@@ -212,21 +212,21 @@
       case FieldDescriptor::TYPE_ENUM:
       case FieldDescriptor::TYPE_SINT32:
       case FieldDescriptor::TYPE_SINT64:
-        return "::protozero::PackedVarIntBuffer";
+        return "::protozero::PackedVarInt";
 
       case FieldDescriptor::TYPE_FIXED32:
-        return "::protozero::PackedFixedSizeBuffer<uint32_t>";
+        return "::protozero::PackedFixedSizeInt<uint32_t>";
       case FieldDescriptor::TYPE_SFIXED32:
-        return "::protozero::PackedFixedSizeBuffer<int32_t>";
+        return "::protozero::PackedFixedSizeInt<int32_t>";
       case FieldDescriptor::TYPE_FLOAT:
-        return "::protozero::PackedFixedSizeBuffer<float>";
+        return "::protozero::PackedFixedSizeInt<float>";
 
       case FieldDescriptor::TYPE_FIXED64:
-        return "::protozero::PackedFixedSizeBuffer<uint64_t>";
+        return "::protozero::PackedFixedSizeInt<uint64_t>";
       case FieldDescriptor::TYPE_SFIXED64:
-        return "::protozero::PackedFixedSizeBuffer<int64_t>";
+        return "::protozero::PackedFixedSizeInt<int64_t>";
       case FieldDescriptor::TYPE_DOUBLE:
-        return "::protozero::PackedFixedSizeBuffer<double>";
+        return "::protozero::PackedFixedSizeInt<double>";
 
       case FieldDescriptor::TYPE_STRING:
       case FieldDescriptor::TYPE_MESSAGE:
diff --git a/src/protozero/test/protozero_conformance_unittest.cc b/src/protozero/test/protozero_conformance_unittest.cc
index 9d8c254..a374c51 100644
--- a/src/protozero/test/protozero_conformance_unittest.cc
+++ b/src/protozero/test/protozero_conformance_unittest.cc
@@ -135,7 +135,7 @@
   int values[] = {42, 255, -1};
 
   HeapBuffered<pbtest::PackedRepeatedFields> msg{kChunkSize, kChunkSize};
-  StackAllocated<PackedVarIntBuffer, 8> buf;
+  PackedVarInt buf;
   for (auto v : values)
     buf.Append(v);
   msg->set_field_int32(buf);
@@ -170,7 +170,7 @@
   uint32_t values[] = {1, 2, 4, 8};
 
   HeapBuffered<pbtest::PackedRepeatedFields> msg{kChunkSize, kChunkSize};
-  StackAllocated<PackedFixedSizeBuffer<uint32_t>, 8> buf;
+  PackedFixedSizeInt<uint32_t> buf;
   for (auto v : values)
     buf.Append(v);
   msg->set_field_fixed32(buf);
@@ -199,7 +199,7 @@
   int64_t values[] = {1, -2, 4, -8};
 
   HeapBuffered<pbtest::PackedRepeatedFields> msg{kChunkSize, kChunkSize};
-  StackAllocated<PackedFixedSizeBuffer<int64_t>, 8> buf;
+  PackedFixedSizeInt<int64_t> buf;
   for (auto v : values)
     buf.Append(v);
   msg->set_field_sfixed64(buf);
@@ -226,7 +226,7 @@
 
 TEST(ProtoZeroConformanceTest, EmptyPackedRepeatedField) {
   HeapBuffered<pbtest::PackedRepeatedFields> msg;
-  StackAllocated<PackedVarIntBuffer, 8> buf;
+  PackedVarInt buf;
   msg->set_field_int32(buf);
   std::string serialized = msg.SerializeAsString();
 
@@ -243,5 +243,25 @@
   EXPECT_EQ(0, parsed_gold_msg.field_int32_size());
 }
 
+// Tests that the stack -> heap expansion dosn't lose data.
+TEST(ProtoZeroConformanceTest, PackedRepeatedResize) {
+  const int kNumValues = 32768;
+  const int64_t kMultiplier = 10000000;
+  HeapBuffered<pbtest::PackedRepeatedFields> msg{kChunkSize, kChunkSize};
+  PackedFixedSizeInt<int64_t> buf;
+  for (int i = 0; i < kNumValues; i++)
+    buf.Append(i * kMultiplier);
+  msg->set_field_sfixed64(buf);
+  std::string serialized = msg.SerializeAsString();
+
+  // Correctly parsed by the protobuf library.
+  pbgold::PackedRepeatedFields parsed_gold_msg;
+  parsed_gold_msg.ParseFromString(serialized);
+  ASSERT_EQ(parsed_gold_msg.field_sfixed64().size(), kNumValues);
+  for (int i = 0; i < kNumValues; i++) {
+    ASSERT_EQ(parsed_gold_msg.field_sfixed64(i), i * kMultiplier);
+  }
+}
+
 }  // namespace
 }  // namespace protozero
diff --git a/src/traced/probes/ftrace/compact_sched.h b/src/traced/probes/ftrace/compact_sched.h
index bebe9ff..6dc4692 100644
--- a/src/traced/probes/ftrace/compact_sched.h
+++ b/src/traced/probes/ftrace/compact_sched.h
@@ -93,25 +93,13 @@
   static constexpr size_t kMinSupportedSchedSwitchSize = 56;
   static constexpr size_t kExpectedCommLength = 16;
 
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>*
-  switch_timestamp() {
-    return &switch_timestamp_;
-  }
+  protozero::PackedVarInt* switch_timestamp() { return &switch_timestamp_; }
 
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>*
-  switch_prev_state() {
-    return &switch_prev_state_;
-  }
+  protozero::PackedVarInt* switch_prev_state() { return &switch_prev_state_; }
 
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>*
-  switch_next_pid() {
-    return &switch_next_pid_;
-  }
+  protozero::PackedVarInt* switch_next_pid() { return &switch_next_pid_; }
 
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>*
-  switch_next_prio() {
-    return &switch_next_prio_;
-  }
+  protozero::PackedVarInt* switch_next_prio() { return &switch_next_prio_; }
 
   size_t interned_switch_comms_size() const {
     return interned_switch_comms_size_;
@@ -157,14 +145,10 @@
   // each relative to the preceding sched_switch timestamp.
   uint64_t last_switch_timestamp_ = 0;
 
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>
-      switch_timestamp_;
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>
-      switch_prev_state_;
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>
-      switch_next_pid_;
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>
-      switch_next_prio_;
+  protozero::PackedVarInt switch_timestamp_;
+  protozero::PackedVarInt switch_prev_state_;
+  protozero::PackedVarInt switch_next_pid_;
+  protozero::PackedVarInt switch_next_prio_;
 
   // Storage for interned strings (without null bytes).
   char intern_buf_[kMaxElements * (kExpectedCommLength - 1)];
@@ -180,8 +164,7 @@
 
   // One entry per sched_switch event, contains the index of the interned
   // next_comm string view (i.e. array index into |interned_switch_comms|).
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>
-      switch_next_comm_index_;
+  protozero::PackedVarInt switch_next_comm_index_;
 };
 
 }  // namespace perfetto
diff --git a/tools/compact_reencode/main.cc b/tools/compact_reencode/main.cc
index 16a0435..d54db1c 100644
--- a/tools/compact_reencode/main.cc
+++ b/tools/compact_reencode/main.cc
@@ -72,17 +72,11 @@
   if (bundle.has_cpu())
     bundle_out->set_cpu(bundle.cpu());
 
-  static constexpr size_t kMaxElements = 2560;
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>
-      switch_timestamp;
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>
-      switch_prev_state;
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>
-      switch_next_pid;
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>
-      switch_next_prio;
-  protozero::StackAllocated<protozero::PackedVarIntBuffer, kMaxElements>
-      switch_next_comm_index;
+  protozero::PackedVarInt switch_timestamp;
+  protozero::PackedVarInt switch_prev_state;
+  protozero::PackedVarInt switch_next_pid;
+  protozero::PackedVarInt switch_next_prio;
+  protozero::PackedVarInt switch_next_comm_index;
 
   uint64_t last_switch_timestamp = 0;