Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2018 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "perfetto/protozero/proto_decoder.h" |
| 18 | |
| 19 | #include <string.h> |
Primiano Tucci | 34a0666 | 2021-06-24 10:17:47 +0100 | [diff] [blame] | 20 | |
Primiano Tucci | 58d2dc6 | 2021-06-24 16:03:24 +0100 | [diff] [blame] | 21 | #include <cinttypes> |
Primiano Tucci | 2272792 | 2019-06-01 10:07:42 +0100 | [diff] [blame] | 22 | #include <limits> |
Primiano Tucci | 34a0666 | 2021-06-24 10:17:47 +0100 | [diff] [blame] | 23 | #include <memory> |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 24 | |
Primiano Tucci | 59a00fe | 2021-01-02 12:58:02 +0100 | [diff] [blame] | 25 | #include "perfetto/base/compiler.h" |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 26 | #include "perfetto/base/logging.h" |
Primiano Tucci | 2c5488f | 2019-06-01 03:27:28 +0100 | [diff] [blame] | 27 | #include "perfetto/ext/base/utils.h" |
Primiano Tucci | f6a3cb0 | 2018-07-02 15:38:50 +0100 | [diff] [blame] | 28 | #include "perfetto/protozero/proto_utils.h" |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 29 | |
| 30 | namespace protozero { |
| 31 | |
| 32 | using namespace proto_utils; |
| 33 | |
Primiano Tucci | 59a00fe | 2021-01-02 12:58:02 +0100 | [diff] [blame] | 34 | #if !PERFETTO_IS_LITTLE_ENDIAN() |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 35 | #error Unimplemented for big endian archs. |
| 36 | #endif |
| 37 | |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 38 | namespace { |
| 39 | |
| 40 | struct ParseFieldResult { |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 41 | enum ParseResult { kAbort, kSkip, kOk }; |
| 42 | ParseResult parse_res; |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 43 | const uint8_t* next; |
| 44 | Field field; |
| 45 | }; |
| 46 | |
| 47 | // Parses one field and returns the field itself and a pointer to the next |
| 48 | // field to parse. If parsing fails, the returned |next| == |buffer|. |
| 49 | PERFETTO_ALWAYS_INLINE ParseFieldResult |
| 50 | ParseOneField(const uint8_t* const buffer, const uint8_t* const end) { |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 51 | ParseFieldResult res{ParseFieldResult::kAbort, buffer, Field{}}; |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 52 | |
| 53 | // The first byte of a proto field is structured as follows: |
| 54 | // The least 3 significant bits determine the field type. |
| 55 | // The most 5 significant bits determine the field id. If MSB == 1, the |
| 56 | // field id continues on the next bytes following the VarInt encoding. |
| 57 | const uint8_t kFieldTypeNumBits = 3; |
| 58 | const uint64_t kFieldTypeMask = (1 << kFieldTypeNumBits) - 1; // 0000 0111; |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 59 | const uint8_t* pos = buffer; |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 60 | |
Lalit Maganti | df3e926 | 2018-06-04 17:45:00 +0100 | [diff] [blame] | 61 | // If we've already hit the end, just return an invalid field. |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 62 | if (PERFETTO_UNLIKELY(pos >= end)) |
| 63 | return res; |
Lalit Maganti | df3e926 | 2018-06-04 17:45:00 +0100 | [diff] [blame] | 64 | |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 65 | uint64_t preamble = 0; |
Ryan | cbd1615 | 2019-09-04 16:54:45 +0100 | [diff] [blame] | 66 | if (PERFETTO_LIKELY(*pos < 0x80)) { // Fastpath for fields with ID < 16. |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 67 | preamble = *(pos++); |
Primiano Tucci | de2476b | 2018-08-23 00:53:20 +0200 | [diff] [blame] | 68 | } else { |
Primiano Tucci | b3e08d0 | 2019-11-19 11:10:11 +0000 | [diff] [blame] | 69 | const uint8_t* next = ParseVarInt(pos, end, &preamble); |
| 70 | if (PERFETTO_UNLIKELY(pos == next)) |
| 71 | return res; |
| 72 | pos = next; |
Primiano Tucci | de2476b | 2018-08-23 00:53:20 +0200 | [diff] [blame] | 73 | } |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 74 | |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 75 | uint32_t field_id = static_cast<uint32_t>(preamble >> kFieldTypeNumBits); |
| 76 | if (field_id == 0 || pos >= end) |
| 77 | return res; |
Primiano Tucci | de2476b | 2018-08-23 00:53:20 +0200 | [diff] [blame] | 78 | |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 79 | auto field_type = static_cast<uint8_t>(preamble & kFieldTypeMask); |
| 80 | const uint8_t* new_pos = pos; |
| 81 | uint64_t int_value = 0; |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 82 | uint64_t size = 0; |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 83 | |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 84 | switch (field_type) { |
| 85 | case static_cast<uint8_t>(ProtoWireType::kVarInt): { |
| 86 | new_pos = ParseVarInt(pos, end, &int_value); |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 87 | |
Lalit Maganti | f1c27b1 | 2018-06-05 12:45:29 +0100 | [diff] [blame] | 88 | // new_pos not being greater than pos means ParseVarInt could not fully |
| 89 | // parse the number. This is because we are out of space in the buffer. |
| 90 | // Set the id to zero and return but don't update the offset so a future |
| 91 | // read can read this field. |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 92 | if (PERFETTO_UNLIKELY(new_pos == pos)) |
| 93 | return res; |
| 94 | |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 95 | break; |
| 96 | } |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 97 | |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 98 | case static_cast<uint8_t>(ProtoWireType::kLengthDelimited): { |
| 99 | uint64_t payload_length; |
| 100 | new_pos = ParseVarInt(pos, end, &payload_length); |
| 101 | if (PERFETTO_UNLIKELY(new_pos == pos)) |
| 102 | return res; |
| 103 | |
| 104 | // ParseVarInt guarantees that |new_pos| <= |end| when it succeeds; |
| 105 | if (payload_length > static_cast<uint64_t>(end - new_pos)) |
| 106 | return res; |
Hector Dearman | cf4ef4b | 2019-02-27 19:10:21 +0000 | [diff] [blame] | 107 | |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 108 | const uintptr_t payload_start = reinterpret_cast<uintptr_t>(new_pos); |
| 109 | int_value = payload_start; |
| 110 | size = payload_length; |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 111 | new_pos += payload_length; |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 112 | break; |
| 113 | } |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 114 | |
| 115 | case static_cast<uint8_t>(ProtoWireType::kFixed64): { |
| 116 | new_pos = pos + sizeof(uint64_t); |
| 117 | if (PERFETTO_UNLIKELY(new_pos > end)) |
| 118 | return res; |
| 119 | memcpy(&int_value, pos, sizeof(uint64_t)); |
Primiano Tucci | de2476b | 2018-08-23 00:53:20 +0200 | [diff] [blame] | 120 | break; |
| 121 | } |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 122 | |
| 123 | case static_cast<uint8_t>(ProtoWireType::kFixed32): { |
| 124 | new_pos = pos + sizeof(uint32_t); |
| 125 | if (PERFETTO_UNLIKELY(new_pos > end)) |
| 126 | return res; |
| 127 | memcpy(&int_value, pos, sizeof(uint32_t)); |
| 128 | break; |
| 129 | } |
| 130 | |
| 131 | default: |
| 132 | PERFETTO_DLOG("Invalid proto field type: %u", field_type); |
| 133 | return res; |
| 134 | } |
| 135 | |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 136 | res.next = new_pos; |
| 137 | |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 138 | if (PERFETTO_UNLIKELY(field_id > std::numeric_limits<uint16_t>::max())) { |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 139 | PERFETTO_DLOG("Skipping field %" PRIu32 " because its id > 0xFFFF", |
| 140 | field_id); |
| 141 | res.parse_res = ParseFieldResult::kSkip; |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 142 | return res; |
| 143 | } |
| 144 | |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 145 | if (PERFETTO_UNLIKELY(size > proto_utils::kMaxMessageLength)) { |
| 146 | PERFETTO_DLOG("Skipping field %" PRIu32 " because it's too big (%" PRIu64 |
| 147 | " KB)", |
| 148 | field_id, size / 1024); |
| 149 | res.parse_res = ParseFieldResult::kSkip; |
| 150 | return res; |
| 151 | } |
| 152 | |
| 153 | res.parse_res = ParseFieldResult::kOk; |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 154 | res.field.initialize(static_cast<uint16_t>(field_id), field_type, int_value, |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 155 | static_cast<uint32_t>(size)); |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 156 | return res; |
| 157 | } |
| 158 | |
| 159 | } // namespace |
| 160 | |
| 161 | Field ProtoDecoder::FindField(uint32_t field_id) { |
Eric Seckler | d012597 | 2019-04-29 09:49:22 +0000 | [diff] [blame] | 162 | Field res{}; |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 163 | auto old_position = read_ptr_; |
| 164 | read_ptr_ = begin_; |
| 165 | for (auto f = ReadField(); f.valid(); f = ReadField()) { |
| 166 | if (f.id() == field_id) { |
| 167 | res = f; |
Primiano Tucci | de2476b | 2018-08-23 00:53:20 +0200 | [diff] [blame] | 168 | break; |
| 169 | } |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 170 | } |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 171 | read_ptr_ = old_position; |
| 172 | return res; |
| 173 | } |
| 174 | |
| 175 | PERFETTO_ALWAYS_INLINE |
| 176 | Field ProtoDecoder::ReadField() { |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 177 | ParseFieldResult res; |
| 178 | do { |
| 179 | res = ParseOneField(read_ptr_, end_); |
| 180 | read_ptr_ = res.next; |
| 181 | } while (PERFETTO_UNLIKELY(res.parse_res == ParseFieldResult::kSkip)); |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 182 | return res.field; |
| 183 | } |
| 184 | |
| 185 | void TypedProtoDecoderBase::ParseAllFields() { |
| 186 | const uint8_t* cur = begin_; |
| 187 | ParseFieldResult res; |
| 188 | for (;;) { |
| 189 | res = ParseOneField(cur, end_); |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 190 | PERFETTO_DCHECK(res.parse_res != ParseFieldResult::kOk || res.next != cur); |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 191 | cur = res.next; |
Primiano Tucci | 343fb7a | 2021-10-19 15:39:38 +0100 | [diff] [blame] | 192 | if (PERFETTO_UNLIKELY(res.parse_res == ParseFieldResult::kSkip)) |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 193 | continue; |
Primiano Tucci | 343fb7a | 2021-10-19 15:39:38 +0100 | [diff] [blame] | 194 | if (PERFETTO_UNLIKELY(res.parse_res == ParseFieldResult::kAbort)) |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 195 | break; |
Primiano Tucci | 343fb7a | 2021-10-19 15:39:38 +0100 | [diff] [blame] | 196 | |
Primiano Tucci | 2cfc7fd | 2019-11-28 16:07:27 +0000 | [diff] [blame] | 197 | PERFETTO_DCHECK(res.parse_res == ParseFieldResult::kOk); |
| 198 | PERFETTO_DCHECK(res.field.valid()); |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 199 | auto field_id = res.field.id(); |
Hector Dearman | c7c267a | 2019-03-25 13:06:21 +0000 | [diff] [blame] | 200 | if (PERFETTO_UNLIKELY(field_id >= num_fields_)) |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 201 | continue; |
| 202 | |
Primiano Tucci | 343fb7a | 2021-10-19 15:39:38 +0100 | [diff] [blame] | 203 | // There are two reasons why we might want to expand the heap capacity: |
| 204 | // 1. We are writing a non-repeated field, which has an id > |
| 205 | // INITIAL_STACK_CAPACITY. In this case ExpandHeapStorage() ensures to |
| 206 | // allocate at least (num_fields_ + 1) slots. |
| 207 | // 2. We are writing a repeated field but ran out of capacity. |
| 208 | if (PERFETTO_UNLIKELY(field_id >= size_ || size_ >= capacity_)) |
| 209 | ExpandHeapStorage(); |
| 210 | |
| 211 | PERFETTO_DCHECK(field_id < size_); |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 212 | Field* fld = &fields_[field_id]; |
| 213 | if (PERFETTO_LIKELY(!fld->valid())) { |
| 214 | // This is the first time we see this field. |
| 215 | *fld = std::move(res.field); |
| 216 | } else { |
Hector Dearman | c7c267a | 2019-03-25 13:06:21 +0000 | [diff] [blame] | 217 | // Repeated field case. |
| 218 | // In this case we need to: |
| 219 | // 1. Append the last value of the field to end of the repeated field |
| 220 | // storage. |
| 221 | // 2. Replace the default instance at offset |field_id| with the current |
| 222 | // value. This is because in case of repeated field a call to Get(X) is |
| 223 | // supposed to return the last value of X, not the first one. |
| 224 | // This is so that the RepeatedFieldIterator will iterate in the right |
| 225 | // order, see comments on RepeatedFieldIterator. |
Primiano Tucci | 343fb7a | 2021-10-19 15:39:38 +0100 | [diff] [blame] | 226 | PERFETTO_DCHECK(size_ < capacity_); |
Hector Dearman | c7c267a | 2019-03-25 13:06:21 +0000 | [diff] [blame] | 227 | fields_[size_++] = *fld; |
| 228 | *fld = std::move(res.field); |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 229 | } |
| 230 | } |
| 231 | read_ptr_ = res.next; |
| 232 | } |
| 233 | |
| 234 | void TypedProtoDecoderBase::ExpandHeapStorage() { |
Primiano Tucci | 343fb7a | 2021-10-19 15:39:38 +0100 | [diff] [blame] | 235 | // When we expand the heap we must ensure that we have at very last capacity |
Primiano Tucci | 84b1d1f | 2021-11-10 22:13:12 +0000 | [diff] [blame] | 236 | // to deal with all known fields plus at least one repeated field. We go +2048 |
| 237 | // here based on observations on a large 4GB android trace. This is to avoid |
| 238 | // trivial re-allocations when dealing with repeated fields of a message that |
| 239 | // has > INITIAL_STACK_CAPACITY fields. |
| 240 | const uint32_t min_capacity = num_fields_ + 2048; // Any num >= +1 will do. |
Primiano Tucci | 343fb7a | 2021-10-19 15:39:38 +0100 | [diff] [blame] | 241 | const uint32_t new_capacity = std::max(capacity_ * 2, min_capacity); |
| 242 | PERFETTO_CHECK(new_capacity > size_ && new_capacity > num_fields_); |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 243 | std::unique_ptr<Field[]> new_storage(new Field[new_capacity]); |
| 244 | |
Primiano Tucci | 84b1d1f | 2021-11-10 22:13:12 +0000 | [diff] [blame] | 245 | static_assert(std::is_trivially_constructible<Field>::value, |
| 246 | "Field must be trivially constructible"); |
Lalit Maganti | ea0254c | 2019-11-15 21:58:46 +0000 | [diff] [blame] | 247 | static_assert(std::is_trivially_copyable<Field>::value, |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 248 | "Field must be trivially copyable"); |
Primiano Tucci | 343fb7a | 2021-10-19 15:39:38 +0100 | [diff] [blame] | 249 | |
| 250 | // Zero-initialize the slots for known field IDs slots, as they can be |
| 251 | // randomly accessed. Instead, there is no need to initialize the repeated |
| 252 | // slots, because they are written linearly with no gaps and are always |
| 253 | // initialized before incrementing |size_|. |
| 254 | const uint32_t new_size = std::max(size_, num_fields_); |
| 255 | memset(&new_storage[size_], 0, sizeof(Field) * (new_size - size_)); |
| 256 | |
Primiano Tucci | c167887 | 2019-03-20 11:30:54 +0000 | [diff] [blame] | 257 | memcpy(&new_storage[0], fields_, sizeof(Field) * size_); |
| 258 | |
| 259 | heap_storage_ = std::move(new_storage); |
| 260 | fields_ = &heap_storage_[0]; |
| 261 | capacity_ = new_capacity; |
Primiano Tucci | 343fb7a | 2021-10-19 15:39:38 +0100 | [diff] [blame] | 262 | size_ = new_size; |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 263 | } |
| 264 | |
Lalit Maganti | 45172a8 | 2018-06-01 03:04:43 +0100 | [diff] [blame] | 265 | } // namespace protozero |