blob: 4f0f9750092803e97420f630f2546b063333f6f3 [file] [log] [blame]
Martin Stjernholm4fb51112021-04-30 11:53:52 +01001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_LIBARTBASE_BASE_LEB128_H_
18#define ART_LIBARTBASE_BASE_LEB128_H_
19
20#include <vector>
21
22#include <android-base/logging.h>
23
24#include "bit_utils.h"
25#include "globals.h"
26#include "macros.h"
27
28namespace art {
29
30// Reads an unsigned LEB128 value, updating the given pointer to point
31// just past the end of the read value. This function tolerates
32// non-zero high-order bits in the fifth encoded byte.
33static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) {
34 const uint8_t* ptr = *data;
35 int result = *(ptr++);
36 if (UNLIKELY(result > 0x7f)) {
37 int cur = *(ptr++);
38 result = (result & 0x7f) | ((cur & 0x7f) << 7);
39 if (cur > 0x7f) {
40 cur = *(ptr++);
41 result |= (cur & 0x7f) << 14;
42 if (cur > 0x7f) {
43 cur = *(ptr++);
44 result |= (cur & 0x7f) << 21;
45 if (cur > 0x7f) {
46 // Note: We don't check to see if cur is out of range here,
47 // meaning we tolerate garbage in the four high-order bits.
48 cur = *(ptr++);
49 result |= cur << 28;
50 }
51 }
52 }
53 }
54 *data = ptr;
55 return static_cast<uint32_t>(result);
56}
57
58static inline uint32_t DecodeUnsignedLeb128WithoutMovingCursor(const uint8_t* data) {
59 return DecodeUnsignedLeb128(&data);
60}
61
62static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data,
63 const void* end,
64 uint32_t* out) {
65 const uint8_t* ptr = *data;
66 if (ptr >= end) {
67 return false;
68 }
69 int result = *(ptr++);
70 if (UNLIKELY(result > 0x7f)) {
71 if (ptr >= end) {
72 return false;
73 }
74 int cur = *(ptr++);
75 result = (result & 0x7f) | ((cur & 0x7f) << 7);
76 if (cur > 0x7f) {
77 if (ptr >= end) {
78 return false;
79 }
80 cur = *(ptr++);
81 result |= (cur & 0x7f) << 14;
82 if (cur > 0x7f) {
83 if (ptr >= end) {
84 return false;
85 }
86 cur = *(ptr++);
87 result |= (cur & 0x7f) << 21;
88 if (cur > 0x7f) {
89 if (ptr >= end) {
90 return false;
91 }
92 // Note: We don't check to see if cur is out of range here,
93 // meaning we tolerate garbage in the four high-order bits.
94 cur = *(ptr++);
95 result |= cur << 28;
96 }
97 }
98 }
99 }
100 *data = ptr;
101 *out = static_cast<uint32_t>(result);
102 return true;
103}
104
105// Reads an unsigned LEB128 + 1 value. updating the given pointer to point
106// just past the end of the read value. This function tolerates
107// non-zero high-order bits in the fifth encoded byte.
108// It is possible for this function to return -1.
109static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
110 return DecodeUnsignedLeb128(data) - 1;
111}
112
113// Reads a signed LEB128 value, updating the given pointer to point
114// just past the end of the read value. This function tolerates
115// non-zero high-order bits in the fifth encoded byte.
116static inline int32_t DecodeSignedLeb128(const uint8_t** data) {
117 const uint8_t* ptr = *data;
118 int32_t result = *(ptr++);
119 if (result <= 0x7f) {
120 result = (result << 25) >> 25;
121 } else {
122 int cur = *(ptr++);
123 result = (result & 0x7f) | ((cur & 0x7f) << 7);
124 if (cur <= 0x7f) {
125 result = (result << 18) >> 18;
126 } else {
127 cur = *(ptr++);
128 result |= (cur & 0x7f) << 14;
129 if (cur <= 0x7f) {
130 result = (result << 11) >> 11;
131 } else {
132 cur = *(ptr++);
133 result |= (cur & 0x7f) << 21;
134 if (cur <= 0x7f) {
135 result = (result << 4) >> 4;
136 } else {
137 // Note: We don't check to see if cur is out of range here,
138 // meaning we tolerate garbage in the four high-order bits.
139 cur = *(ptr++);
140 result |= cur << 28;
141 }
142 }
143 }
144 }
145 *data = ptr;
146 return result;
147}
148
149static inline bool DecodeSignedLeb128Checked(const uint8_t** data,
150 const void* end,
151 int32_t* out) {
152 const uint8_t* ptr = *data;
153 if (ptr >= end) {
154 return false;
155 }
156 int32_t result = *(ptr++);
157 if (result <= 0x7f) {
158 result = (result << 25) >> 25;
159 } else {
160 if (ptr >= end) {
161 return false;
162 }
163 int cur = *(ptr++);
164 result = (result & 0x7f) | ((cur & 0x7f) << 7);
165 if (cur <= 0x7f) {
166 result = (result << 18) >> 18;
167 } else {
168 if (ptr >= end) {
169 return false;
170 }
171 cur = *(ptr++);
172 result |= (cur & 0x7f) << 14;
173 if (cur <= 0x7f) {
174 result = (result << 11) >> 11;
175 } else {
176 if (ptr >= end) {
177 return false;
178 }
179 cur = *(ptr++);
180 result |= (cur & 0x7f) << 21;
181 if (cur <= 0x7f) {
182 result = (result << 4) >> 4;
183 } else {
184 if (ptr >= end) {
185 return false;
186 }
187 // Note: We don't check to see if cur is out of range here,
188 // meaning we tolerate garbage in the four high-order bits.
189 cur = *(ptr++);
190 result |= cur << 28;
191 }
192 }
193 }
194 }
195 *data = ptr;
196 *out = static_cast<uint32_t>(result);
197 return true;
198}
199
200// Returns the number of bytes needed to encode the value in unsigned LEB128.
201static inline uint32_t UnsignedLeb128Size(uint32_t data) {
202 // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1 // 32 - CLZ(data | 1)
203 // bytes = ceil(bits_to_encode / 7.0); // (6 + bits_to_encode) / 7
204 uint32_t x = 6 + 32 - CLZ(data | 1U);
205 // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
206 // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
207 return (x * 37) >> 8;
208}
209
210static inline bool IsLeb128Terminator(const uint8_t* ptr) {
211 return *ptr <= 0x7f;
212}
213
214// Returns the first byte of a Leb128 value assuming that:
215// (1) `end_ptr` points to the first byte after the Leb128 value, and
216// (2) there is another Leb128 value before this one.
217template <typename T>
218static inline T* ReverseSearchUnsignedLeb128(T* end_ptr) {
android-t13d2c5b22022-10-12 13:43:18 +0800219 static_assert(std::is_same_v<std::remove_const_t<T>, uint8_t>,
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100220 "T must be a uint8_t");
221 T* ptr = end_ptr;
222
223 // Move one byte back, check that this is the terminating byte.
224 ptr--;
225 DCHECK(IsLeb128Terminator(ptr));
226
227 // Keep moving back while the previous byte is not a terminating byte.
228 // Fail after reading five bytes in case there isn't another Leb128 value
229 // before this one.
230 while (!IsLeb128Terminator(ptr - 1)) {
231 ptr--;
232 DCHECK_LE(static_cast<ptrdiff_t>(end_ptr - ptr), 5);
233 }
234
235 return ptr;
236}
237
238// Returns the number of bytes needed to encode the value in unsigned LEB128.
239static inline uint32_t SignedLeb128Size(int32_t data) {
240 // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
241 data = data ^ (data >> 31);
242 uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
243 return (x * 37) >> 8;
244}
245
246static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) {
247 uint8_t out = value & 0x7f;
248 value >>= 7;
249 while (value != 0) {
250 *dest++ = out | 0x80;
251 out = value & 0x7f;
252 value >>= 7;
253 }
254 *dest++ = out;
255 return dest;
256}
257
258template <typename Vector>
259static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
android-t13d2c5b22022-10-12 13:43:18 +0800260 static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100261 uint8_t out = value & 0x7f;
262 value >>= 7;
263 while (value != 0) {
264 dest->push_back(out | 0x80);
265 out = value & 0x7f;
266 value >>= 7;
267 }
268 dest->push_back(out);
269}
270
271// Overwrite encoded Leb128 with a new value. The new value must be less than
272// or equal to the old value to ensure that it fits the allocated space.
273static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
274 const uint8_t* old_end = dest;
275 uint32_t old_value = DecodeUnsignedLeb128(&old_end);
276 DCHECK_LE(UnsignedLeb128Size(value), UnsignedLeb128Size(old_value));
277 for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
278 // Use longer encoding than necessary to fill the allocated space.
279 end[-1] |= 0x80;
280 end[0] = 0;
281 }
282}
283
284static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
285 uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
286 uint8_t out = value & 0x7f;
287 while (extra_bits != 0u) {
288 *dest++ = out | 0x80;
289 value >>= 7;
290 out = value & 0x7f;
291 extra_bits >>= 7;
292 }
293 *dest++ = out;
294 return dest;
295}
296
297template<typename Vector>
298static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
android-t13d2c5b22022-10-12 13:43:18 +0800299 static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100300 uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
301 uint8_t out = value & 0x7f;
302 while (extra_bits != 0u) {
303 dest->push_back(out | 0x80);
304 value >>= 7;
305 out = value & 0x7f;
306 extra_bits >>= 7;
307 }
308 dest->push_back(out);
309}
310
311// An encoder that pushes int32_t/uint32_t data onto the given std::vector.
312template <typename Vector = std::vector<uint8_t>>
313class Leb128Encoder {
android-t13d2c5b22022-10-12 13:43:18 +0800314 static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100315
316 public:
317 explicit Leb128Encoder(Vector* data) : data_(data) {
318 DCHECK(data != nullptr);
319 }
320
321 void Reserve(uint32_t size) {
322 data_->reserve(size);
323 }
324
325 void PushBackUnsigned(uint32_t value) {
326 EncodeUnsignedLeb128(data_, value);
327 }
328
329 template<typename It>
330 void InsertBackUnsigned(It cur, It end) {
331 for (; cur != end; ++cur) {
332 PushBackUnsigned(*cur);
333 }
334 }
335
336 void PushBackSigned(int32_t value) {
337 EncodeSignedLeb128(data_, value);
338 }
339
340 template<typename It>
341 void InsertBackSigned(It cur, It end) {
342 for (; cur != end; ++cur) {
343 PushBackSigned(*cur);
344 }
345 }
346
347 const Vector& GetData() const {
348 return *data_;
349 }
350
351 protected:
352 Vector* const data_;
353
354 private:
355 DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
356};
357
358// An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
359template <typename Vector = std::vector<uint8_t>>
360class Leb128EncodingVector final : private Vector,
361 public Leb128Encoder<Vector> {
android-t13d2c5b22022-10-12 13:43:18 +0800362 static_assert(std::is_same_v<typename Vector::value_type, uint8_t>, "Invalid value type");
Martin Stjernholm4fb51112021-04-30 11:53:52 +0100363
364 public:
365 Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
366
367 explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
368 : Vector(alloc),
369 Leb128Encoder<Vector>(this) { }
370
371 private:
372 DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
373};
374
375} // namespace art
376
377#endif // ART_LIBARTBASE_BASE_LEB128_H_