protozero: add proto decoder class for efficient in-place decoding
This new class allows decoding of protobufs with minimal overhead - i.e.
no allocations or unnecessary parsing. This allows for very fast parsing
of traces which is required for the trace processor loading code.
Bug: 80416541
Change-Id: I8f9d5d9525f7f17ca7e0b513c5c52a52600bf1e2
diff --git a/Android.bp b/Android.bp
index 47c381f..3af992e 100644
--- a/Android.bp
+++ b/Android.bp
@@ -61,6 +61,7 @@
"src/ipc/virtual_destructors.cc",
"src/protozero/message.cc",
"src/protozero/message_handle.cc",
+ "src/protozero/proto_decoder.cc",
"src/protozero/proto_utils.cc",
"src/protozero/scattered_stream_null_delegate.cc",
"src/protozero/scattered_stream_writer.cc",
@@ -167,6 +168,7 @@
"src/perfetto_cmd/rate_limiter.cc",
"src/protozero/message.cc",
"src/protozero/message_handle.cc",
+ "src/protozero/proto_decoder.cc",
"src/protozero/proto_utils.cc",
"src/protozero/scattered_stream_null_delegate.cc",
"src/protozero/scattered_stream_writer.cc",
@@ -313,6 +315,7 @@
"src/ipc/virtual_destructors.cc",
"src/protozero/message.cc",
"src/protozero/message_handle.cc",
+ "src/protozero/proto_decoder.cc",
"src/protozero/proto_utils.cc",
"src/protozero/scattered_stream_delegate_for_testing.cc",
"src/protozero/scattered_stream_null_delegate.cc",
@@ -3481,6 +3484,7 @@
"src/ipc/virtual_destructors.cc",
"src/protozero/message.cc",
"src/protozero/message_handle.cc",
+ "src/protozero/proto_decoder.cc",
"src/protozero/proto_utils.cc",
"src/protozero/scattered_stream_null_delegate.cc",
"src/protozero/scattered_stream_writer.cc",
@@ -3699,6 +3703,8 @@
"src/protozero/message_handle.cc",
"src/protozero/message_handle_unittest.cc",
"src/protozero/message_unittest.cc",
+ "src/protozero/proto_decoder.cc",
+ "src/protozero/proto_decoder_unittest.cc",
"src/protozero/proto_utils.cc",
"src/protozero/proto_utils_unittest.cc",
"src/protozero/scattered_stream_delegate_for_testing.cc",
diff --git a/include/perfetto/protozero/BUILD.gn b/include/perfetto/protozero/BUILD.gn
index b97b137..cc75d2d 100644
--- a/include/perfetto/protozero/BUILD.gn
+++ b/include/perfetto/protozero/BUILD.gn
@@ -20,7 +20,9 @@
"contiguous_memory_range.h",
"message.h",
"message_handle.h",
+ "proto_decoder.h",
"proto_field_descriptor.h",
+ "proto_utils.h",
"scattered_stream_null_delegate.h",
"scattered_stream_writer.h",
]
diff --git a/include/perfetto/protozero/proto_decoder.h b/include/perfetto/protozero/proto_decoder.h
new file mode 100644
index 0000000..24d36f6
--- /dev/null
+++ b/include/perfetto/protozero/proto_decoder.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
+#define INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
+
+#include <stdint.h>
+#include <memory>
+
+#include "perfetto/protozero/proto_utils.h"
+
+namespace protozero {
+
+// Reads and decodes protobuf messages from a fixed length buffer. This class
+// does not allocate and does no more work than necessary so can be used in
+// performance sensitive contexts.
+class ProtoDecoder {
+ public:
+ // The field of a protobuf message. |id| == 0 if the tag is not valid (e.g.
+ // because the full tag was unable to be read etc.).
+ struct Field {
+ struct LengthDelimited {
+ const uint8_t* data;
+ uint64_t length;
+ };
+
+ uint32_t id = 0;
+ protozero::proto_utils::FieldType type;
+ union {
+ uint64_t int_value;
+ LengthDelimited length_value;
+ };
+ };
+
+ // Creates a ProtoDecoder using the given |buffer| with size |length| bytes.
+ ProtoDecoder(const uint8_t* buffer, uint64_t length);
+
+ // Reads the next field from the buffer. If the full field cannot be read,
+ // the returned struct will have id 0 which is an invalid field id.
+ Field ReadField();
+
+ // Returns true if |length_| == |current_position_| - |buffer| and false
+ // otherwise.
+ bool IsEndOfBuffer();
+
+ private:
+ const uint8_t* const buffer_;
+ const uint64_t length_;
+ const uint8_t* current_position_ = nullptr;
+};
+
+} // namespace protozero
+
+#endif // INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
diff --git a/include/perfetto/protozero/proto_utils.h b/include/perfetto/protozero/proto_utils.h
index 5a8fb01..a0f9d9a 100644
--- a/include/perfetto/protozero/proto_utils.h
+++ b/include/perfetto/protozero/proto_utils.h
@@ -112,19 +112,6 @@
const uint8_t* end,
uint64_t* value);
-// Parses a protobuf field and computes its id, type and value.
-// Returns a pointer to the next unconsumed byte (|start| < retval <= end) that
-// is either the beginning of the next field or the end of the parent message.
-// In the case of a kFieldTypeLengthDelimited field, |field_intvalue| will
-// store the length of the payload (either a string or a nested message). In
-// this case, the start of the payload will be at (return value) -
-// |field_intvalue|.
-const uint8_t* ParseField(const uint8_t* start,
- const uint8_t* end,
- uint32_t* field_id,
- FieldType* field_type,
- uint64_t* field_intvalue);
-
} // namespace proto_utils
} // namespace protozero
diff --git a/src/protozero/BUILD.gn b/src/protozero/BUILD.gn
index 1f10d00..2d45f00 100644
--- a/src/protozero/BUILD.gn
+++ b/src/protozero/BUILD.gn
@@ -29,6 +29,7 @@
sources = [
"message.cc",
"message_handle.cc",
+ "proto_decoder.cc",
"proto_utils.cc",
"scattered_stream_null_delegate.cc",
"scattered_stream_writer.cc",
@@ -53,6 +54,7 @@
testonly = true
deps = [
":protozero",
+ ":test_support",
":testing_messages_lite",
":testing_messages_zero",
"../../gn:default_deps",
@@ -62,6 +64,7 @@
sources = [
"message_handle_unittest.cc",
"message_unittest.cc",
+ "proto_decoder_unittest.cc",
"proto_utils_unittest.cc",
"scattered_stream_writer_unittest.cc",
"test/fake_scattered_buffer.cc",
diff --git a/src/protozero/proto_decoder.cc b/src/protozero/proto_decoder.cc
new file mode 100644
index 0000000..64eda6f
--- /dev/null
+++ b/src/protozero/proto_decoder.cc
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/protozero/proto_decoder.h"
+
+#include <string.h>
+
+#include "perfetto/base/logging.h"
+
+namespace protozero {
+
+using namespace proto_utils;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define BYTE_SWAP_TO_LE32(x) (x)
+#define BYTE_SWAP_TO_LE64(x) (x)
+#else
+#error Unimplemented for big endian archs.
+#endif
+
+ProtoDecoder::ProtoDecoder(const uint8_t* buffer, uint64_t length)
+ : buffer_(buffer), length_(length), current_position_(buffer) {}
+
+ProtoDecoder::Field ProtoDecoder::ReadField() {
+ Field field{};
+
+ // The first byte of a proto field is structured as follows:
+ // The least 3 significant bits determine the field type.
+ // The most 5 significant bits determine the field id. If MSB == 1, the
+ // field id continues on the next bytes following the VarInt encoding.
+ const uint8_t kFieldTypeNumBits = 3;
+ const uint64_t kFieldTypeMask = (1 << kFieldTypeNumBits) - 1; // 0000 0111;
+
+ const uint8_t* end = buffer_ + length_;
+ const uint8_t* pos = current_position_;
+ PERFETTO_DCHECK(pos >= buffer_);
+ PERFETTO_DCHECK(pos <= end);
+
+ uint64_t raw_field_id = 0;
+ pos = ParseVarInt(pos, end, &raw_field_id);
+
+ uint32_t field_id = static_cast<uint32_t>(raw_field_id >> kFieldTypeNumBits);
+ if (field_id == 0 || pos >= end) {
+ return field;
+ }
+ field.type = static_cast<FieldType>(raw_field_id & kFieldTypeMask);
+
+ uint64_t field_intvalue = 0;
+ switch (field.type) {
+ case kFieldTypeFixed64: {
+ if (pos + sizeof(uint64_t) > end) {
+ return field;
+ }
+ memcpy(&field_intvalue, pos, sizeof(uint64_t));
+ field.int_value = BYTE_SWAP_TO_LE64(field_intvalue);
+ pos += sizeof(uint64_t);
+ break;
+ }
+ case kFieldTypeFixed32: {
+ if (pos + sizeof(uint32_t) > end) {
+ return field;
+ }
+ uint32_t tmp;
+ memcpy(&tmp, pos, sizeof(uint32_t));
+ field.int_value = BYTE_SWAP_TO_LE32(tmp);
+ pos += sizeof(uint32_t);
+ break;
+ }
+ case kFieldTypeVarInt: {
+ // We need to explicity check for zero to ensure that ParseVarInt doesn't
+ // return zero because of running out of space in the buffer.
+ if (*pos == 0) {
+ pos++;
+ field.int_value = 0;
+ } else {
+ pos = ParseVarInt(pos, end, &field.int_value);
+
+ // The parsed value equalling zero means ParseVarInt could not fully
+ // parse the number. This is because we are out of space in the buffer.
+ // Set the id to zero and return but don't update the offset so a future
+ // read can read this field.
+ if (field.int_value == 0) {
+ return field;
+ }
+ }
+ break;
+ }
+ case kFieldTypeLengthDelimited: {
+ // We need to explicity check for zero to ensure that ParseVarInt doesn't
+ // return zero because of running out of space in the buffer.
+ if (*pos == 0) {
+ field.length_value.data = ++pos;
+ field.length_value.length = 0;
+ } else {
+ pos = ParseVarInt(pos, end, &field_intvalue);
+
+ // The parsed value equalling zero means ParseVarInt could not fully
+ // parse the number. This is because we are out of space in the buffer.
+ // Alternatively, we may not have space to fully read the length
+ // delimited field. Set the id to zero and return but don't update the
+ // offset so a future read can read this field.
+ if (field_intvalue == 0 || pos + field_intvalue > end) {
+ return field;
+ }
+ field.length_value.data = pos;
+ field.length_value.length = field_intvalue;
+ pos += field_intvalue;
+ }
+ break;
+ }
+ }
+ // Set the field id to make the returned value valid and update the current
+ // position in the buffer.
+ field.id = field_id;
+ current_position_ = pos;
+ return field;
+}
+
+bool ProtoDecoder::IsEndOfBuffer() {
+ PERFETTO_DCHECK(current_position_ >= buffer_);
+ return length_ == static_cast<uint64_t>(current_position_ - buffer_);
+}
+
+} // namespace protozero
diff --git a/src/protozero/proto_decoder_unittest.cc b/src/protozero/proto_decoder_unittest.cc
new file mode 100644
index 0000000..fff9dba
--- /dev/null
+++ b/src/protozero/proto_decoder_unittest.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2017 The Android Open foo Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/protozero/proto_decoder.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "perfetto/base/utils.h"
+#include "perfetto/protozero/message.h"
+#include "perfetto/protozero/proto_utils.h"
+#include "src/protozero/scattered_stream_delegate_for_testing.h"
+
+namespace protozero {
+namespace {
+
+using ::testing::_;
+using ::testing::InSequence;
+using ::testing::Invoke;
+using namespace protozero::proto_utils;
+
+TEST(ProtoDecoder, ReadString) {
+ Message message;
+ perfetto::ScatteredStreamDelegateForTesting delegate(512);
+ ScatteredStreamWriter writer(&delegate);
+ delegate.set_writer(&writer);
+ message.Reset(&writer);
+
+ static constexpr char kTestString[] = "test";
+ message.AppendString(1, kTestString);
+
+ uint8_t* data = delegate.chunks()[0].get();
+ uint64_t bytes_used = 512 - writer.bytes_available();
+
+ ProtoDecoder decoder(data, bytes_used);
+ ProtoDecoder::Field field = decoder.ReadField();
+
+ ASSERT_EQ(field.id, 1);
+ ASSERT_EQ(field.type, proto_utils::FieldType::kFieldTypeLengthDelimited);
+ ASSERT_EQ(field.length_value.length, sizeof(kTestString) - 1);
+ for (size_t i = 0; i < sizeof(kTestString) - 1; i++) {
+ ASSERT_EQ(field.length_value.data[i], kTestString[i]);
+ }
+}
+
+TEST(ProtoDecoder, FixedData) {
+ struct FieldExpectation {
+ const char* encoded;
+ size_t encoded_size;
+ uint32_t id;
+ FieldType type;
+ uint64_t int_value;
+ };
+
+ const FieldExpectation kFieldExpectations[] = {
+ {"\x08\x00", 2, 1, kFieldTypeVarInt, 0},
+ {"\x08\x42", 2, 1, kFieldTypeVarInt, 0x42},
+ {"\xF8\x07\x42", 3, 127, kFieldTypeVarInt, 0x42},
+ {"\x90\x4D\xFF\xFF\xFF\xFF\x0F", 7, 1234, kFieldTypeVarInt, 0xFFFFFFFF},
+ {"\x7D\x42\x00\x00\x00", 5, 15, kFieldTypeFixed32, 0x42},
+ {"\x95\x4D\x78\x56\x34\x12", 6, 1234, kFieldTypeFixed32, 0x12345678},
+ {"\x79\x42\x00\x00\x00\x00\x00\x00\x00", 9, 15, kFieldTypeFixed64, 0x42},
+ {"\x91\x4D\x08\x07\x06\x05\x04\x03\x02\x01", 10, 1234, kFieldTypeFixed64,
+ 0x0102030405060708},
+ {"\x0A\x00", 2, 1, kFieldTypeLengthDelimited, 0},
+ {"\x0A\x04|abc", 6, 1, kFieldTypeLengthDelimited, 4},
+ {"\x92\x4D\x04|abc", 7, 1234, kFieldTypeLengthDelimited, 4},
+ {"\x92\x4D\x83\x01|abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzab"
+ "cdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstu"
+ "vwx",
+ 135, 1234, kFieldTypeLengthDelimited, 131},
+ };
+
+ for (size_t i = 0; i < perfetto::base::ArraySize(kFieldExpectations); ++i) {
+ const FieldExpectation& exp = kFieldExpectations[i];
+ ProtoDecoder decoder(reinterpret_cast<const uint8_t*>(exp.encoded),
+ exp.encoded_size);
+
+ ProtoDecoder::Field field = decoder.ReadField();
+ ASSERT_EQ(exp.id, field.id);
+ ASSERT_EQ(exp.type, field.type);
+
+ if (field.type == kFieldTypeLengthDelimited) {
+ ASSERT_EQ(exp.int_value, field.length_value.length);
+ } else {
+ ASSERT_EQ(exp.int_value, field.int_value);
+ }
+ }
+}
+
+} // namespace
+} // namespace protozero
diff --git a/src/protozero/proto_utils.cc b/src/protozero/proto_utils.cc
index baf1120..6f1fba3 100644
--- a/src/protozero/proto_utils.cc
+++ b/src/protozero/proto_utils.cc
@@ -23,20 +23,9 @@
#include "perfetto/base/logging.h"
#include "perfetto/base/utils.h"
-#define PERFETTO_CHECK_PTR_LE(a, b) \
- PERFETTO_CHECK(reinterpret_cast<uintptr_t>(a) <= \
- reinterpret_cast<uintptr_t>(b))
-
namespace protozero {
namespace proto_utils {
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#define BYTE_SWAP_TO_LE32(x) (x)
-#define BYTE_SWAP_TO_LE64(x) (x)
-#else
-#error Unimplemented for big endian archs.
-#endif
-
const uint8_t* ParseVarInt(const uint8_t* start,
const uint8_t* end,
uint64_t* value) {
@@ -55,58 +44,5 @@
return pos;
}
-const uint8_t* ParseField(const uint8_t* start,
- const uint8_t* end,
- uint32_t* field_id,
- FieldType* field_type,
- uint64_t* field_intvalue) {
- // The first byte of a proto field is structured as follows:
- // The least 3 significant bits determine the field type.
- // The most 5 significant bits determine the field id. If MSB == 1, the
- // field id continues on the next bytes following the VarInt encoding.
- const uint8_t kFieldTypeNumBits = 3;
- const uint8_t kFieldTypeMask = (1 << kFieldTypeNumBits) - 1; // 0000 0111;
-
- const uint8_t* pos = start;
- PERFETTO_CHECK_PTR_LE(pos, end - 1);
- *field_type = static_cast<FieldType>(*pos & kFieldTypeMask);
-
- uint64_t raw_field_id;
- pos = ParseVarInt(pos, end, &raw_field_id);
- raw_field_id >>= kFieldTypeNumBits;
-
- PERFETTO_DCHECK(raw_field_id <= std::numeric_limits<uint32_t>::max());
- *field_id = static_cast<uint32_t>(raw_field_id);
-
- switch (*field_type) {
- case kFieldTypeFixed64: {
- PERFETTO_CHECK_PTR_LE(pos + sizeof(uint64_t), end);
- memcpy(field_intvalue, pos, sizeof(uint64_t));
- *field_intvalue = BYTE_SWAP_TO_LE64(*field_intvalue);
- pos += sizeof(uint64_t);
- break;
- }
- case kFieldTypeFixed32: {
- PERFETTO_CHECK_PTR_LE(pos + sizeof(uint32_t), end);
- uint32_t tmp;
- memcpy(&tmp, pos, sizeof(uint32_t));
- *field_intvalue = BYTE_SWAP_TO_LE32(tmp);
- pos += sizeof(uint32_t);
- break;
- }
- case kFieldTypeVarInt: {
- pos = ParseVarInt(pos, end, field_intvalue);
- break;
- }
- case kFieldTypeLengthDelimited: {
- pos = ParseVarInt(pos, end, field_intvalue);
- pos += *field_intvalue;
- PERFETTO_CHECK_PTR_LE(pos, end);
- break;
- }
- }
- return pos;
-}
-
} // namespace proto_utils
} // namespace protozero
diff --git a/src/protozero/proto_utils_unittest.cc b/src/protozero/proto_utils_unittest.cc
index 01adcd3..40296dd 100644
--- a/src/protozero/proto_utils_unittest.cc
+++ b/src/protozero/proto_utils_unittest.cc
@@ -166,51 +166,6 @@
}
}
-TEST(ProtoUtilsTest, FieldDecoding) {
- struct FieldExpectation {
- const char* encoded;
- size_t encoded_size;
- uint32_t id;
- FieldType type;
- uint64_t int_value;
- };
-
- const FieldExpectation kFieldExpectations[] = {
- {"\x08\x00", 2, 1, kFieldTypeVarInt, 0},
- {"\x08\x42", 2, 1, kFieldTypeVarInt, 0x42},
- {"\xF8\x07\x42", 3, 127, kFieldTypeVarInt, 0x42},
- {"\x90\x4D\xFF\xFF\xFF\xFF\x0F", 7, 1234, kFieldTypeVarInt, 0xFFFFFFFF},
- {"\x7D\x42\x00\x00\x00", 5, 15, kFieldTypeFixed32, 0x42},
- {"\x95\x4D\x78\x56\x34\x12", 6, 1234, kFieldTypeFixed32, 0x12345678},
- {"\x79\x42\x00\x00\x00\x00\x00\x00\x00", 9, 15, kFieldTypeFixed64, 0x42},
- {"\x91\x4D\x08\x07\x06\x05\x04\x03\x02\x01", 10, 1234, kFieldTypeFixed64,
- 0x0102030405060708},
- {"\x0A\x00", 2, 1, kFieldTypeLengthDelimited, 0},
- {"\x0A\x04|abc", 6, 1, kFieldTypeLengthDelimited, 4},
- {"\x92\x4D\x04|abc", 7, 1234, kFieldTypeLengthDelimited, 4},
- {"\x92\x4D\x83\x01|abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzab"
- "cdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstu"
- "vwx",
- 135, 1234, kFieldTypeLengthDelimited, 131},
- };
-
- for (size_t i = 0; i < ArraySize(kFieldExpectations); ++i) {
- const FieldExpectation& exp = kFieldExpectations[i];
- FieldType field_type = kFieldTypeVarInt;
- uint32_t field_id = std::numeric_limits<uint32_t>::max();
- uint64_t field_intvalue = std::numeric_limits<uint64_t>::max();
- const uint8_t* res = ParseField(
- reinterpret_cast<const uint8_t*>(exp.encoded),
- reinterpret_cast<const uint8_t*>(exp.encoded + exp.encoded_size),
- &field_id, &field_type, &field_intvalue);
- ASSERT_EQ(reinterpret_cast<const void*>(exp.encoded + exp.encoded_size),
- reinterpret_cast<const void*>(res));
- ASSERT_EQ(exp.id, field_id);
- ASSERT_EQ(exp.type, field_type);
- ASSERT_EQ(exp.int_value, field_intvalue);
- }
-}
-
} // namespace
} // namespace proto_utils
} // namespace protozero