protozero: add proto decoder class for efficient in-place decoding

This new class allows decoding of protobufs with minimal overhead - i.e.
no allocations or unnecessary parsing. This allows for very fast parsing
of traces which is required for the trace processor loading code.

Bug: 80416541
Change-Id: I8f9d5d9525f7f17ca7e0b513c5c52a52600bf1e2
diff --git a/Android.bp b/Android.bp
index 47c381f..3af992e 100644
--- a/Android.bp
+++ b/Android.bp
@@ -61,6 +61,7 @@
     "src/ipc/virtual_destructors.cc",
     "src/protozero/message.cc",
     "src/protozero/message_handle.cc",
+    "src/protozero/proto_decoder.cc",
     "src/protozero/proto_utils.cc",
     "src/protozero/scattered_stream_null_delegate.cc",
     "src/protozero/scattered_stream_writer.cc",
@@ -167,6 +168,7 @@
     "src/perfetto_cmd/rate_limiter.cc",
     "src/protozero/message.cc",
     "src/protozero/message_handle.cc",
+    "src/protozero/proto_decoder.cc",
     "src/protozero/proto_utils.cc",
     "src/protozero/scattered_stream_null_delegate.cc",
     "src/protozero/scattered_stream_writer.cc",
@@ -313,6 +315,7 @@
     "src/ipc/virtual_destructors.cc",
     "src/protozero/message.cc",
     "src/protozero/message_handle.cc",
+    "src/protozero/proto_decoder.cc",
     "src/protozero/proto_utils.cc",
     "src/protozero/scattered_stream_delegate_for_testing.cc",
     "src/protozero/scattered_stream_null_delegate.cc",
@@ -3481,6 +3484,7 @@
     "src/ipc/virtual_destructors.cc",
     "src/protozero/message.cc",
     "src/protozero/message_handle.cc",
+    "src/protozero/proto_decoder.cc",
     "src/protozero/proto_utils.cc",
     "src/protozero/scattered_stream_null_delegate.cc",
     "src/protozero/scattered_stream_writer.cc",
@@ -3699,6 +3703,8 @@
     "src/protozero/message_handle.cc",
     "src/protozero/message_handle_unittest.cc",
     "src/protozero/message_unittest.cc",
+    "src/protozero/proto_decoder.cc",
+    "src/protozero/proto_decoder_unittest.cc",
     "src/protozero/proto_utils.cc",
     "src/protozero/proto_utils_unittest.cc",
     "src/protozero/scattered_stream_delegate_for_testing.cc",
diff --git a/include/perfetto/protozero/BUILD.gn b/include/perfetto/protozero/BUILD.gn
index b97b137..cc75d2d 100644
--- a/include/perfetto/protozero/BUILD.gn
+++ b/include/perfetto/protozero/BUILD.gn
@@ -20,7 +20,9 @@
     "contiguous_memory_range.h",
     "message.h",
     "message_handle.h",
+    "proto_decoder.h",
     "proto_field_descriptor.h",
+    "proto_utils.h",
     "scattered_stream_null_delegate.h",
     "scattered_stream_writer.h",
   ]
diff --git a/include/perfetto/protozero/proto_decoder.h b/include/perfetto/protozero/proto_decoder.h
new file mode 100644
index 0000000..24d36f6
--- /dev/null
+++ b/include/perfetto/protozero/proto_decoder.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
+#define INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
+
+#include <stdint.h>
+#include <memory>
+
+#include "perfetto/protozero/proto_utils.h"
+
+namespace protozero {
+
+// Reads and decodes protobuf messages from a fixed length buffer. This class
+// does not allocate and does no more work than necessary so can be used in
+// performance sensitive contexts.
+class ProtoDecoder {
+ public:
+  // The field of a protobuf message. |id| == 0 if the tag is not valid (e.g.
+  // because the full tag was unable to be read etc.).
+  struct Field {
+    struct LengthDelimited {
+      const uint8_t* data;
+      uint64_t length;
+    };
+
+    uint32_t id = 0;
+    protozero::proto_utils::FieldType type;
+    union {
+      uint64_t int_value;
+      LengthDelimited length_value;
+    };
+  };
+
+  // Creates a ProtoDecoder using the given |buffer| with size |length| bytes.
+  ProtoDecoder(const uint8_t* buffer, uint64_t length);
+
+  // Reads the next field from the buffer. If the full field cannot be read,
+  // the returned struct will have id 0 which is an invalid field id.
+  Field ReadField();
+
+  // Returns true if |length_| == |current_position_| - |buffer| and false
+  // otherwise.
+  bool IsEndOfBuffer();
+
+ private:
+  const uint8_t* const buffer_;
+  const uint64_t length_;
+  const uint8_t* current_position_ = nullptr;
+};
+
+}  // namespace protozero
+
+#endif  // INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
diff --git a/include/perfetto/protozero/proto_utils.h b/include/perfetto/protozero/proto_utils.h
index 5a8fb01..a0f9d9a 100644
--- a/include/perfetto/protozero/proto_utils.h
+++ b/include/perfetto/protozero/proto_utils.h
@@ -112,19 +112,6 @@
                            const uint8_t* end,
                            uint64_t* value);
 
-// Parses a protobuf field and computes its id, type and value.
-// Returns a pointer to the next unconsumed byte (|start| < retval <= end) that
-// is either the beginning of the next field or the end of the parent message.
-// In the case of a kFieldTypeLengthDelimited field, |field_intvalue| will
-// store the length of the payload (either a string or a nested message). In
-// this case, the start of the payload will be at (return value) -
-// |field_intvalue|.
-const uint8_t* ParseField(const uint8_t* start,
-                          const uint8_t* end,
-                          uint32_t* field_id,
-                          FieldType* field_type,
-                          uint64_t* field_intvalue);
-
 }  // namespace proto_utils
 }  // namespace protozero
 
diff --git a/src/protozero/BUILD.gn b/src/protozero/BUILD.gn
index 1f10d00..2d45f00 100644
--- a/src/protozero/BUILD.gn
+++ b/src/protozero/BUILD.gn
@@ -29,6 +29,7 @@
   sources = [
     "message.cc",
     "message_handle.cc",
+    "proto_decoder.cc",
     "proto_utils.cc",
     "scattered_stream_null_delegate.cc",
     "scattered_stream_writer.cc",
@@ -53,6 +54,7 @@
   testonly = true
   deps = [
     ":protozero",
+    ":test_support",
     ":testing_messages_lite",
     ":testing_messages_zero",
     "../../gn:default_deps",
@@ -62,6 +64,7 @@
   sources = [
     "message_handle_unittest.cc",
     "message_unittest.cc",
+    "proto_decoder_unittest.cc",
     "proto_utils_unittest.cc",
     "scattered_stream_writer_unittest.cc",
     "test/fake_scattered_buffer.cc",
diff --git a/src/protozero/proto_decoder.cc b/src/protozero/proto_decoder.cc
new file mode 100644
index 0000000..64eda6f
--- /dev/null
+++ b/src/protozero/proto_decoder.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/protozero/proto_decoder.h"
+
+#include <string.h>
+
+#include "perfetto/base/logging.h"
+
+namespace protozero {
+
+using namespace proto_utils;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define BYTE_SWAP_TO_LE32(x) (x)
+#define BYTE_SWAP_TO_LE64(x) (x)
+#else
+#error Unimplemented for big endian archs.
+#endif
+
+ProtoDecoder::ProtoDecoder(const uint8_t* buffer, uint64_t length)
+    : buffer_(buffer), length_(length), current_position_(buffer) {}
+
+ProtoDecoder::Field ProtoDecoder::ReadField() {
+  Field field{};
+
+  // The first byte of a proto field is structured as follows:
+  // The least 3 significant bits determine the field type.
+  // The most 5 significant bits determine the field id. If MSB == 1, the
+  // field id continues on the next bytes following the VarInt encoding.
+  const uint8_t kFieldTypeNumBits = 3;
+  const uint64_t kFieldTypeMask = (1 << kFieldTypeNumBits) - 1;  // 0000 0111;
+
+  const uint8_t* end = buffer_ + length_;
+  const uint8_t* pos = current_position_;
+  PERFETTO_DCHECK(pos >= buffer_);
+  PERFETTO_DCHECK(pos <= end);
+
+  uint64_t raw_field_id = 0;
+  pos = ParseVarInt(pos, end, &raw_field_id);
+
+  uint32_t field_id = static_cast<uint32_t>(raw_field_id >> kFieldTypeNumBits);
+  if (field_id == 0 || pos >= end) {
+    return field;
+  }
+  field.type = static_cast<FieldType>(raw_field_id & kFieldTypeMask);
+
+  uint64_t field_intvalue = 0;
+  switch (field.type) {
+    case kFieldTypeFixed64: {
+      if (pos + sizeof(uint64_t) > end) {
+        return field;
+      }
+      memcpy(&field_intvalue, pos, sizeof(uint64_t));
+      field.int_value = BYTE_SWAP_TO_LE64(field_intvalue);
+      pos += sizeof(uint64_t);
+      break;
+    }
+    case kFieldTypeFixed32: {
+      if (pos + sizeof(uint32_t) > end) {
+        return field;
+      }
+      uint32_t tmp;
+      memcpy(&tmp, pos, sizeof(uint32_t));
+      field.int_value = BYTE_SWAP_TO_LE32(tmp);
+      pos += sizeof(uint32_t);
+      break;
+    }
+    case kFieldTypeVarInt: {
+      // We need to explicity check for zero to ensure that ParseVarInt doesn't
+      // return zero because of running out of space in the buffer.
+      if (*pos == 0) {
+        pos++;
+        field.int_value = 0;
+      } else {
+        pos = ParseVarInt(pos, end, &field.int_value);
+
+        // The parsed value equalling zero means ParseVarInt could not fully
+        // parse the number. This is because we are out of space in the buffer.
+        // Set the id to zero and return but don't update the offset so a future
+        // read can read this field.
+        if (field.int_value == 0) {
+          return field;
+        }
+      }
+      break;
+    }
+    case kFieldTypeLengthDelimited: {
+      // We need to explicity check for zero to ensure that ParseVarInt doesn't
+      // return zero because of running out of space in the buffer.
+      if (*pos == 0) {
+        field.length_value.data = ++pos;
+        field.length_value.length = 0;
+      } else {
+        pos = ParseVarInt(pos, end, &field_intvalue);
+
+        // The parsed value equalling zero means ParseVarInt could not fully
+        // parse the number. This is because we are out of space in the buffer.
+        // Alternatively, we may not have space to fully read the length
+        // delimited field. Set the id to zero and return but don't update the
+        // offset so a future read can read this field.
+        if (field_intvalue == 0 || pos + field_intvalue > end) {
+          return field;
+        }
+        field.length_value.data = pos;
+        field.length_value.length = field_intvalue;
+        pos += field_intvalue;
+      }
+      break;
+    }
+  }
+  // Set the field id to make the returned value valid and update the current
+  // position in the buffer.
+  field.id = field_id;
+  current_position_ = pos;
+  return field;
+}
+
+bool ProtoDecoder::IsEndOfBuffer() {
+  PERFETTO_DCHECK(current_position_ >= buffer_);
+  return length_ == static_cast<uint64_t>(current_position_ - buffer_);
+}
+
+}  // namespace protozero
diff --git a/src/protozero/proto_decoder_unittest.cc b/src/protozero/proto_decoder_unittest.cc
new file mode 100644
index 0000000..fff9dba
--- /dev/null
+++ b/src/protozero/proto_decoder_unittest.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2017 The Android Open foo Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/protozero/proto_decoder.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "perfetto/base/utils.h"
+#include "perfetto/protozero/message.h"
+#include "perfetto/protozero/proto_utils.h"
+#include "src/protozero/scattered_stream_delegate_for_testing.h"
+
+namespace protozero {
+namespace {
+
+using ::testing::_;
+using ::testing::InSequence;
+using ::testing::Invoke;
+using namespace protozero::proto_utils;
+
+TEST(ProtoDecoder, ReadString) {
+  Message message;
+  perfetto::ScatteredStreamDelegateForTesting delegate(512);
+  ScatteredStreamWriter writer(&delegate);
+  delegate.set_writer(&writer);
+  message.Reset(&writer);
+
+  static constexpr char kTestString[] = "test";
+  message.AppendString(1, kTestString);
+
+  uint8_t* data = delegate.chunks()[0].get();
+  uint64_t bytes_used = 512 - writer.bytes_available();
+
+  ProtoDecoder decoder(data, bytes_used);
+  ProtoDecoder::Field field = decoder.ReadField();
+
+  ASSERT_EQ(field.id, 1);
+  ASSERT_EQ(field.type, proto_utils::FieldType::kFieldTypeLengthDelimited);
+  ASSERT_EQ(field.length_value.length, sizeof(kTestString) - 1);
+  for (size_t i = 0; i < sizeof(kTestString) - 1; i++) {
+    ASSERT_EQ(field.length_value.data[i], kTestString[i]);
+  }
+}
+
+TEST(ProtoDecoder, FixedData) {
+  struct FieldExpectation {
+    const char* encoded;
+    size_t encoded_size;
+    uint32_t id;
+    FieldType type;
+    uint64_t int_value;
+  };
+
+  const FieldExpectation kFieldExpectations[] = {
+      {"\x08\x00", 2, 1, kFieldTypeVarInt, 0},
+      {"\x08\x42", 2, 1, kFieldTypeVarInt, 0x42},
+      {"\xF8\x07\x42", 3, 127, kFieldTypeVarInt, 0x42},
+      {"\x90\x4D\xFF\xFF\xFF\xFF\x0F", 7, 1234, kFieldTypeVarInt, 0xFFFFFFFF},
+      {"\x7D\x42\x00\x00\x00", 5, 15, kFieldTypeFixed32, 0x42},
+      {"\x95\x4D\x78\x56\x34\x12", 6, 1234, kFieldTypeFixed32, 0x12345678},
+      {"\x79\x42\x00\x00\x00\x00\x00\x00\x00", 9, 15, kFieldTypeFixed64, 0x42},
+      {"\x91\x4D\x08\x07\x06\x05\x04\x03\x02\x01", 10, 1234, kFieldTypeFixed64,
+       0x0102030405060708},
+      {"\x0A\x00", 2, 1, kFieldTypeLengthDelimited, 0},
+      {"\x0A\x04|abc", 6, 1, kFieldTypeLengthDelimited, 4},
+      {"\x92\x4D\x04|abc", 7, 1234, kFieldTypeLengthDelimited, 4},
+      {"\x92\x4D\x83\x01|abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzab"
+       "cdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstu"
+       "vwx",
+       135, 1234, kFieldTypeLengthDelimited, 131},
+  };
+
+  for (size_t i = 0; i < perfetto::base::ArraySize(kFieldExpectations); ++i) {
+    const FieldExpectation& exp = kFieldExpectations[i];
+    ProtoDecoder decoder(reinterpret_cast<const uint8_t*>(exp.encoded),
+                         exp.encoded_size);
+
+    ProtoDecoder::Field field = decoder.ReadField();
+    ASSERT_EQ(exp.id, field.id);
+    ASSERT_EQ(exp.type, field.type);
+
+    if (field.type == kFieldTypeLengthDelimited) {
+      ASSERT_EQ(exp.int_value, field.length_value.length);
+    } else {
+      ASSERT_EQ(exp.int_value, field.int_value);
+    }
+  }
+}
+
+}  // namespace
+}  // namespace protozero
diff --git a/src/protozero/proto_utils.cc b/src/protozero/proto_utils.cc
index baf1120..6f1fba3 100644
--- a/src/protozero/proto_utils.cc
+++ b/src/protozero/proto_utils.cc
@@ -23,20 +23,9 @@
 #include "perfetto/base/logging.h"
 #include "perfetto/base/utils.h"
 
-#define PERFETTO_CHECK_PTR_LE(a, b)                \
-  PERFETTO_CHECK(reinterpret_cast<uintptr_t>(a) <= \
-                 reinterpret_cast<uintptr_t>(b))
-
 namespace protozero {
 namespace proto_utils {
 
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#define BYTE_SWAP_TO_LE32(x) (x)
-#define BYTE_SWAP_TO_LE64(x) (x)
-#else
-#error Unimplemented for big endian archs.
-#endif
-
 const uint8_t* ParseVarInt(const uint8_t* start,
                            const uint8_t* end,
                            uint64_t* value) {
@@ -55,58 +44,5 @@
   return pos;
 }
 
-const uint8_t* ParseField(const uint8_t* start,
-                          const uint8_t* end,
-                          uint32_t* field_id,
-                          FieldType* field_type,
-                          uint64_t* field_intvalue) {
-  // The first byte of a proto field is structured as follows:
-  // The least 3 significant bits determine the field type.
-  // The most 5 significant bits determine the field id. If MSB == 1, the
-  // field id continues on the next bytes following the VarInt encoding.
-  const uint8_t kFieldTypeNumBits = 3;
-  const uint8_t kFieldTypeMask = (1 << kFieldTypeNumBits) - 1;  // 0000 0111;
-
-  const uint8_t* pos = start;
-  PERFETTO_CHECK_PTR_LE(pos, end - 1);
-  *field_type = static_cast<FieldType>(*pos & kFieldTypeMask);
-
-  uint64_t raw_field_id;
-  pos = ParseVarInt(pos, end, &raw_field_id);
-  raw_field_id >>= kFieldTypeNumBits;
-
-  PERFETTO_DCHECK(raw_field_id <= std::numeric_limits<uint32_t>::max());
-  *field_id = static_cast<uint32_t>(raw_field_id);
-
-  switch (*field_type) {
-    case kFieldTypeFixed64: {
-      PERFETTO_CHECK_PTR_LE(pos + sizeof(uint64_t), end);
-      memcpy(field_intvalue, pos, sizeof(uint64_t));
-      *field_intvalue = BYTE_SWAP_TO_LE64(*field_intvalue);
-      pos += sizeof(uint64_t);
-      break;
-    }
-    case kFieldTypeFixed32: {
-      PERFETTO_CHECK_PTR_LE(pos + sizeof(uint32_t), end);
-      uint32_t tmp;
-      memcpy(&tmp, pos, sizeof(uint32_t));
-      *field_intvalue = BYTE_SWAP_TO_LE32(tmp);
-      pos += sizeof(uint32_t);
-      break;
-    }
-    case kFieldTypeVarInt: {
-      pos = ParseVarInt(pos, end, field_intvalue);
-      break;
-    }
-    case kFieldTypeLengthDelimited: {
-      pos = ParseVarInt(pos, end, field_intvalue);
-      pos += *field_intvalue;
-      PERFETTO_CHECK_PTR_LE(pos, end);
-      break;
-    }
-  }
-  return pos;
-}
-
 }  // namespace proto_utils
 }  // namespace protozero
diff --git a/src/protozero/proto_utils_unittest.cc b/src/protozero/proto_utils_unittest.cc
index 01adcd3..40296dd 100644
--- a/src/protozero/proto_utils_unittest.cc
+++ b/src/protozero/proto_utils_unittest.cc
@@ -166,51 +166,6 @@
   }
 }
 
-TEST(ProtoUtilsTest, FieldDecoding) {
-  struct FieldExpectation {
-    const char* encoded;
-    size_t encoded_size;
-    uint32_t id;
-    FieldType type;
-    uint64_t int_value;
-  };
-
-  const FieldExpectation kFieldExpectations[] = {
-      {"\x08\x00", 2, 1, kFieldTypeVarInt, 0},
-      {"\x08\x42", 2, 1, kFieldTypeVarInt, 0x42},
-      {"\xF8\x07\x42", 3, 127, kFieldTypeVarInt, 0x42},
-      {"\x90\x4D\xFF\xFF\xFF\xFF\x0F", 7, 1234, kFieldTypeVarInt, 0xFFFFFFFF},
-      {"\x7D\x42\x00\x00\x00", 5, 15, kFieldTypeFixed32, 0x42},
-      {"\x95\x4D\x78\x56\x34\x12", 6, 1234, kFieldTypeFixed32, 0x12345678},
-      {"\x79\x42\x00\x00\x00\x00\x00\x00\x00", 9, 15, kFieldTypeFixed64, 0x42},
-      {"\x91\x4D\x08\x07\x06\x05\x04\x03\x02\x01", 10, 1234, kFieldTypeFixed64,
-       0x0102030405060708},
-      {"\x0A\x00", 2, 1, kFieldTypeLengthDelimited, 0},
-      {"\x0A\x04|abc", 6, 1, kFieldTypeLengthDelimited, 4},
-      {"\x92\x4D\x04|abc", 7, 1234, kFieldTypeLengthDelimited, 4},
-      {"\x92\x4D\x83\x01|abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzab"
-       "cdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstu"
-       "vwx",
-       135, 1234, kFieldTypeLengthDelimited, 131},
-  };
-
-  for (size_t i = 0; i < ArraySize(kFieldExpectations); ++i) {
-    const FieldExpectation& exp = kFieldExpectations[i];
-    FieldType field_type = kFieldTypeVarInt;
-    uint32_t field_id = std::numeric_limits<uint32_t>::max();
-    uint64_t field_intvalue = std::numeric_limits<uint64_t>::max();
-    const uint8_t* res = ParseField(
-        reinterpret_cast<const uint8_t*>(exp.encoded),
-        reinterpret_cast<const uint8_t*>(exp.encoded + exp.encoded_size),
-        &field_id, &field_type, &field_intvalue);
-    ASSERT_EQ(reinterpret_cast<const void*>(exp.encoded + exp.encoded_size),
-              reinterpret_cast<const void*>(res));
-    ASSERT_EQ(exp.id, field_id);
-    ASSERT_EQ(exp.type, field_type);
-    ASSERT_EQ(exp.int_value, field_intvalue);
-  }
-}
-
 }  // namespace
 }  // namespace proto_utils
 }  // namespace protozero