blob: f9de2e1143fb71451a7e7bbbd04f9d30190dfd90 [file] [log] [blame]
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001// Copyright 2015 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_WASM_DECODER_H_
6#define V8_WASM_DECODER_H_
7
8#include "src/base/smart-pointers.h"
9#include "src/flags.h"
10#include "src/signature.h"
11#include "src/wasm/wasm-result.h"
12#include "src/zone-containers.h"
13
14namespace v8 {
15namespace internal {
16namespace wasm {
17
18#if DEBUG
19#define TRACE(...) \
20 do { \
21 if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
22 } while (false)
23#else
24#define TRACE(...)
25#endif
26
Ben Murdoch097c5b22016-05-18 11:27:45 +010027#if !(V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_ARM)
28#define UNALIGNED_ACCESS_OK 1
29#else
30#define UNALIGNED_ACCESS_OK 0
31#endif
32
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000033// A helper utility to decode bytes, integers, fields, varints, etc, from
34// a buffer of bytes.
35class Decoder {
36 public:
37 Decoder(const byte* start, const byte* end)
38 : start_(start),
39 pc_(start),
40 limit_(end),
Ben Murdoch097c5b22016-05-18 11:27:45 +010041 end_(end),
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000042 error_pc_(nullptr),
43 error_pt_(nullptr) {}
44
45 virtual ~Decoder() {}
46
Ben Murdoch097c5b22016-05-18 11:27:45 +010047 inline bool check(const byte* base, int offset, int length, const char* msg) {
48 DCHECK_GE(base, start_);
49 if ((base + offset + length) > limit_) {
50 error(base, base + offset, msg);
51 return false;
52 }
53 return true;
54 }
55
56 // Reads a single 8-bit byte, reporting an error if out of bounds.
57 inline uint8_t checked_read_u8(const byte* base, int offset,
58 const char* msg = "expected 1 byte") {
59 return check(base, offset, 1, msg) ? base[offset] : 0;
60 }
61
62 // Reads 16-bit word, reporting an error if out of bounds.
63 inline uint16_t checked_read_u16(const byte* base, int offset,
64 const char* msg = "expected 2 bytes") {
65 return check(base, offset, 2, msg) ? read_u16(base + offset) : 0;
66 }
67
68 // Reads 32-bit word, reporting an error if out of bounds.
69 inline uint32_t checked_read_u32(const byte* base, int offset,
70 const char* msg = "expected 4 bytes") {
71 return check(base, offset, 4, msg) ? read_u32(base + offset) : 0;
72 }
73
74 // Reads 64-bit word, reporting an error if out of bounds.
75 inline uint64_t checked_read_u64(const byte* base, int offset,
76 const char* msg = "expected 8 bytes") {
77 return check(base, offset, 8, msg) ? read_u64(base + offset) : 0;
78 }
79
Ben Murdochda12d292016-06-02 14:46:10 +010080 // Reads a variable-length unsigned integer (little endian).
Ben Murdoch097c5b22016-05-18 11:27:45 +010081 uint32_t checked_read_u32v(const byte* base, int offset, int* length,
Ben Murdochda12d292016-06-02 14:46:10 +010082 const char* msg = "expected LEB32") {
83 return checked_read_leb<uint32_t, false>(base, offset, length, msg);
84 }
Ben Murdoch097c5b22016-05-18 11:27:45 +010085
Ben Murdochda12d292016-06-02 14:46:10 +010086 // Reads a variable-length signed integer (little endian).
87 int32_t checked_read_i32v(const byte* base, int offset, int* length,
88 const char* msg = "expected SLEB32") {
89 uint32_t result =
90 checked_read_leb<uint32_t, true>(base, offset, length, msg);
91 if (*length == 5) return bit_cast<int32_t>(result);
92 if (*length > 0) {
93 int shift = 32 - 7 * *length;
94 // Perform sign extension.
95 return bit_cast<int32_t>(result << shift) >> shift;
Ben Murdoch097c5b22016-05-18 11:27:45 +010096 }
Ben Murdochda12d292016-06-02 14:46:10 +010097 return 0;
98 }
99
100 // Reads a variable-length unsigned integer (little endian).
101 uint64_t checked_read_u64v(const byte* base, int offset, int* length,
102 const char* msg = "expected LEB64") {
103 return checked_read_leb<uint64_t, false>(base, offset, length, msg);
104 }
105
106 // Reads a variable-length signed integer (little endian).
107 int64_t checked_read_i64v(const byte* base, int offset, int* length,
108 const char* msg = "expected SLEB64") {
109 uint64_t result =
110 checked_read_leb<uint64_t, true>(base, offset, length, msg);
111 if (*length == 10) return bit_cast<int64_t>(result);
112 if (*length > 0) {
113 int shift = 64 - 7 * *length;
114 // Perform sign extension.
115 return bit_cast<int64_t>(result << shift) >> shift;
Ben Murdoch097c5b22016-05-18 11:27:45 +0100116 }
Ben Murdochda12d292016-06-02 14:46:10 +0100117 return 0;
Ben Murdoch097c5b22016-05-18 11:27:45 +0100118 }
119
120 // Reads a single 16-bit unsigned integer (little endian).
121 inline uint16_t read_u16(const byte* ptr) {
122 DCHECK(ptr >= start_ && (ptr + 2) <= end_);
123#if V8_TARGET_LITTLE_ENDIAN && UNALIGNED_ACCESS_OK
124 return *reinterpret_cast<const uint16_t*>(ptr);
125#else
126 uint16_t b0 = ptr[0];
127 uint16_t b1 = ptr[1];
128 return (b1 << 8) | b0;
129#endif
130 }
131
132 // Reads a single 32-bit unsigned integer (little endian).
133 inline uint32_t read_u32(const byte* ptr) {
134 DCHECK(ptr >= start_ && (ptr + 4) <= end_);
135#if V8_TARGET_LITTLE_ENDIAN && UNALIGNED_ACCESS_OK
136 return *reinterpret_cast<const uint32_t*>(ptr);
137#else
138 uint32_t b0 = ptr[0];
139 uint32_t b1 = ptr[1];
140 uint32_t b2 = ptr[2];
141 uint32_t b3 = ptr[3];
142 return (b3 << 24) | (b2 << 16) | (b1 << 8) | b0;
143#endif
144 }
145
146 // Reads a single 64-bit unsigned integer (little endian).
147 inline uint64_t read_u64(const byte* ptr) {
148 DCHECK(ptr >= start_ && (ptr + 8) <= end_);
149#if V8_TARGET_LITTLE_ENDIAN && UNALIGNED_ACCESS_OK
150 return *reinterpret_cast<const uint64_t*>(ptr);
151#else
152 uint32_t b0 = ptr[0];
153 uint32_t b1 = ptr[1];
154 uint32_t b2 = ptr[2];
155 uint32_t b3 = ptr[3];
156 uint32_t low = (b3 << 24) | (b2 << 16) | (b1 << 8) | b0;
157 uint32_t b4 = ptr[4];
158 uint32_t b5 = ptr[5];
159 uint32_t b6 = ptr[6];
160 uint32_t b7 = ptr[7];
161 uint64_t high = (b7 << 24) | (b6 << 16) | (b5 << 8) | b4;
162 return (high << 32) | low;
163#endif
164 }
165
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000166 // Reads a 8-bit unsigned integer (byte) and advances {pc_}.
Ben Murdoch097c5b22016-05-18 11:27:45 +0100167 uint8_t consume_u8(const char* name = nullptr) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000168 TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_),
169 name ? name : "uint8_t");
170 if (checkAvailable(1)) {
171 byte val = *(pc_++);
172 TRACE("%02x = %d\n", val, val);
173 return val;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000174 }
Ben Murdoch097c5b22016-05-18 11:27:45 +0100175 return traceOffEnd<uint8_t>();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000176 }
177
178 // Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
Ben Murdoch097c5b22016-05-18 11:27:45 +0100179 uint16_t consume_u16(const char* name = nullptr) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000180 TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_),
181 name ? name : "uint16_t");
182 if (checkAvailable(2)) {
Ben Murdoch097c5b22016-05-18 11:27:45 +0100183 uint16_t val = read_u16(pc_);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000184 TRACE("%02x %02x = %d\n", pc_[0], pc_[1], val);
185 pc_ += 2;
186 return val;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000187 }
Ben Murdoch097c5b22016-05-18 11:27:45 +0100188 return traceOffEnd<uint16_t>();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000189 }
190
191 // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
Ben Murdoch097c5b22016-05-18 11:27:45 +0100192 uint32_t consume_u32(const char* name = nullptr) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000193 TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_),
194 name ? name : "uint32_t");
195 if (checkAvailable(4)) {
Ben Murdoch097c5b22016-05-18 11:27:45 +0100196 uint32_t val = read_u32(pc_);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000197 TRACE("%02x %02x %02x %02x = %u\n", pc_[0], pc_[1], pc_[2], pc_[3], val);
198 pc_ += 4;
199 return val;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000200 }
Ben Murdoch097c5b22016-05-18 11:27:45 +0100201 return traceOffEnd<uint32_t>();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000202 }
203
204 // Reads a LEB128 variable-length 32-bit integer and advances {pc_}.
Ben Murdoch097c5b22016-05-18 11:27:45 +0100205 uint32_t consume_u32v(int* length, const char* name = nullptr) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000206 TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_),
207 name ? name : "varint");
208
Ben Murdoch097c5b22016-05-18 11:27:45 +0100209 if (checkAvailable(1)) {
210 const byte* pos = pc_;
211 const byte* end = pc_ + 5;
212 if (end > limit_) end = limit_;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000213
Ben Murdoch097c5b22016-05-18 11:27:45 +0100214 uint32_t result = 0;
215 int shift = 0;
216 byte b = 0;
217 while (pc_ < end) {
218 b = *pc_++;
219 TRACE("%02x ", b);
220 result = result | ((b & 0x7F) << shift);
221 if ((b & 0x80) == 0) break;
222 shift += 7;
223 }
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000224
Ben Murdoch097c5b22016-05-18 11:27:45 +0100225 *length = static_cast<int>(pc_ - pos);
226 if (pc_ == end && (b & 0x80)) {
227 error(pc_ - 1, "varint too large");
Ben Murdochda12d292016-06-02 14:46:10 +0100228 } else if (*length == 0) {
229 error(pc_, "varint of length 0");
Ben Murdoch097c5b22016-05-18 11:27:45 +0100230 } else {
231 TRACE("= %u\n", result);
232 }
233 return result;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000234 }
Ben Murdoch097c5b22016-05-18 11:27:45 +0100235 return traceOffEnd<uint32_t>();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000236 }
237
Ben Murdochda12d292016-06-02 14:46:10 +0100238 // Consume {size} bytes and send them to the bit bucket, advancing {pc_}.
239 void consume_bytes(int size) {
240 if (checkAvailable(size)) {
241 pc_ += size;
242 } else {
243 pc_ = limit_;
244 }
245 }
246
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000247 // Check that at least {size} bytes exist between {pc_} and {limit_}.
248 bool checkAvailable(int size) {
Ben Murdochda12d292016-06-02 14:46:10 +0100249 intptr_t pc_overflow_value = std::numeric_limits<intptr_t>::max() - size;
250 if (size < 0 || (intptr_t)pc_ > pc_overflow_value) {
251 error(pc_, nullptr, "reading %d bytes would underflow/overflow", size);
252 return false;
253 } else if (pc_ < start_ || limit_ < (pc_ + size)) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000254 error(pc_, nullptr, "expected %d bytes, fell off end", size);
255 return false;
256 } else {
257 return true;
258 }
259 }
260
261 void error(const char* msg) { error(pc_, nullptr, msg); }
262
263 void error(const byte* pc, const char* msg) { error(pc, nullptr, msg); }
264
265 // Sets internal error state.
266 void error(const byte* pc, const byte* pt, const char* format, ...) {
267 if (ok()) {
268#if DEBUG
269 if (FLAG_wasm_break_on_decoder_error) {
270 base::OS::DebugBreak();
271 }
272#endif
273 const int kMaxErrorMsg = 256;
274 char* buffer = new char[kMaxErrorMsg];
275 va_list arguments;
276 va_start(arguments, format);
277 base::OS::VSNPrintF(buffer, kMaxErrorMsg - 1, format, arguments);
278 va_end(arguments);
279 error_msg_.Reset(buffer);
280 error_pc_ = pc;
281 error_pt_ = pt;
282 onFirstError();
283 }
284 }
285
286 // Behavior triggered on first error, overridden in subclasses.
287 virtual void onFirstError() {}
288
289 // Debugging helper to print bytes up to the end.
290 template <typename T>
291 T traceOffEnd() {
292 T t = 0;
293 for (const byte* ptr = pc_; ptr < limit_; ptr++) {
294 TRACE("%02x ", *ptr);
295 }
296 TRACE("<end>\n");
297 pc_ = limit_;
298 return t;
299 }
300
301 // Converts the given value to a {Result}, copying the error if necessary.
302 template <typename T>
303 Result<T> toResult(T val) {
304 Result<T> result;
305 if (error_pc_) {
Ben Murdochda12d292016-06-02 14:46:10 +0100306 TRACE("Result error: %s\n", error_msg_.get());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000307 result.error_code = kError;
308 result.start = start_;
309 result.error_pc = error_pc_;
310 result.error_pt = error_pt_;
Ben Murdochda12d292016-06-02 14:46:10 +0100311 // transfer ownership of the error to the result.
312 result.error_msg.Reset(error_msg_.Detach());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000313 } else {
314 result.error_code = kSuccess;
315 }
316 result.val = val;
317 return result;
318 }
319
320 // Resets the boundaries of this decoder.
321 void Reset(const byte* start, const byte* end) {
322 start_ = start;
323 pc_ = start;
324 limit_ = end;
Ben Murdoch097c5b22016-05-18 11:27:45 +0100325 end_ = end;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000326 error_pc_ = nullptr;
327 error_pt_ = nullptr;
328 error_msg_.Reset(nullptr);
329 }
330
331 bool ok() const { return error_pc_ == nullptr; }
Ben Murdochda12d292016-06-02 14:46:10 +0100332 bool failed() const { return !error_msg_.is_empty(); }
333 bool more() const { return pc_ < limit_; }
334
335 const byte* start() { return start_; }
336 const byte* pc() { return pc_; }
337 uint32_t pc_offset() { return static_cast<uint32_t>(pc_ - start_); }
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000338
339 protected:
340 const byte* start_;
341 const byte* pc_;
342 const byte* limit_;
Ben Murdoch097c5b22016-05-18 11:27:45 +0100343 const byte* end_;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000344 const byte* error_pc_;
345 const byte* error_pt_;
346 base::SmartArrayPointer<char> error_msg_;
Ben Murdochda12d292016-06-02 14:46:10 +0100347
348 private:
349 template <typename IntType, bool is_signed>
350 IntType checked_read_leb(const byte* base, int offset, int* length,
351 const char* msg) {
352 if (!check(base, offset, 1, msg)) {
353 *length = 0;
354 return 0;
355 }
356
357 const int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
358 const byte* ptr = base + offset;
359 const byte* end = ptr + kMaxLength;
360 if (end > limit_) end = limit_;
361 int shift = 0;
362 byte b = 0;
363 IntType result = 0;
364 while (ptr < end) {
365 b = *ptr++;
366 result = result | (static_cast<IntType>(b & 0x7F) << shift);
367 if ((b & 0x80) == 0) break;
368 shift += 7;
369 }
370 DCHECK_LE(ptr - (base + offset), kMaxLength);
371 *length = static_cast<int>(ptr - (base + offset));
372 if (ptr == end) {
373 // Check there are no bits set beyond the bitwidth of {IntType}.
374 const int kExtraBits = (1 + kMaxLength * 7) - (sizeof(IntType) * 8);
375 const byte kExtraBitsMask =
376 static_cast<byte>((0xFF << (8 - kExtraBits)) & 0xFF);
377 int extra_bits_value;
378 if (is_signed) {
379 // A signed-LEB128 must sign-extend the final byte, excluding its
380 // most-signifcant bit. e.g. for a 32-bit LEB128:
381 // kExtraBits = 4
382 // kExtraBitsMask = 0xf0
383 // If b is 0x0f, the value is negative, so extra_bits_value is 0x70.
384 // If b is 0x03, the value is positive, so extra_bits_value is 0x00.
385 extra_bits_value = (static_cast<int8_t>(b << kExtraBits) >> 8) &
386 kExtraBitsMask & ~0x80;
387 } else {
388 extra_bits_value = 0;
389 }
390 if (*length == kMaxLength && (b & kExtraBitsMask) != extra_bits_value) {
391 error(base, ptr, "extra bits in varint");
392 return 0;
393 }
394 if ((b & 0x80) != 0) {
395 error(base, ptr, msg);
396 return 0;
397 }
398 }
399 return result;
400 }
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000401};
402
403#undef TRACE
404} // namespace wasm
405} // namespace internal
406} // namespace v8
407
408#endif // V8_WASM_DECODER_H_