blob: 3ac9c91d4119e9f5675f15c789a7bdb8a0d9786a [file] [log] [blame]
Joe Tsai22505a42018-08-01 13:12:49 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package wire parses and formats the protobuf wire encoding.
6//
7// See https://developers.google.com/protocol-buffers/docs/encoding.
8package wire
9
10import (
Joe Tsai22505a42018-08-01 13:12:49 -070011 "io"
12 "math/bits"
Joe Tsai0e6baaa2018-08-03 17:55:44 -070013
14 "google.golang.org/proto/internal/errors"
Joe Tsai22505a42018-08-01 13:12:49 -070015)
16
17// Number represents the field number.
18type Number int32
19
20const (
21 MinValidNumber Number = 1
22 FirstReservedNumber Number = 19000
23 LastReservedNumber Number = 19999
24 MaxValidNumber Number = 1<<29 - 1
25)
26
27// IsValid reports whether the field number is semantically valid.
28//
29// Note that while numbers within the reserved range are semantically invalid,
30// they are syntactically valid in the wire format.
31// Implementations may treat records with reserved field numbers as unknown.
32func (n Number) IsValid() bool {
33 return MinValidNumber <= n && n < FirstReservedNumber || LastReservedNumber < n && n <= MaxValidNumber
34}
35
36// Type represents the wire type.
37type Type int8
38
39const (
40 VarintType Type = 0
41 Fixed32Type Type = 5
42 Fixed64Type Type = 1
43 BytesType Type = 2
44 StartGroupType Type = 3
45 EndGroupType Type = 4
46)
47
48const (
49 _ = -iota
50 errCodeTruncated
51 errCodeFieldNumber
52 errCodeOverflow
53 errCodeReserved
54 errCodeEndGroup
55)
56
57var (
58 errFieldNumber = errors.New("invalid field number")
59 errOverflow = errors.New("variable length integer overflow")
60 errReserved = errors.New("cannot parse reserved wire type")
61 errEndGroup = errors.New("mismatching end group marker")
62 errParse = errors.New("parse error")
63)
64
65// ParseError converts an error code into an error value.
66// This returns nil if n is a non-negative number.
67func ParseError(n int) error {
68 if n >= 0 {
69 return nil
70 }
71 switch n {
72 case errCodeTruncated:
73 return io.ErrUnexpectedEOF
74 case errCodeFieldNumber:
75 return errFieldNumber
76 case errCodeOverflow:
77 return errOverflow
78 case errCodeReserved:
79 return errReserved
80 case errCodeEndGroup:
81 return errEndGroup
82 default:
83 return errParse
84 }
85}
86
87// ConsumeField parses an entire field record (both tag and value) and returns
88// the field number, the wire type, and the total length.
89// This returns a negative length upon an error (see ParseError).
90//
91// The total length includes the tag header and the end group marker (if the
92// field is a group).
93func ConsumeField(b []byte) (Number, Type, int) {
94 num, typ, n := ConsumeTag(b)
95 if n < 0 {
96 return 0, 0, n // forward error code
97 }
98 m := ConsumeFieldValue(num, typ, b[n:])
99 if m < 0 {
100 return 0, 0, m // forward error code
101 }
102 return num, typ, n + m
103}
104
105// ConsumeFieldValue parses a field value and returns its length.
106// This assumes that the field Number and wire Type have already been parsed.
107// This returns a negative length upon an error (see ParseError).
108//
109// When parsing a group, the length includes the end group marker and
110// the end group is verified to match the starting field number.
111func ConsumeFieldValue(num Number, typ Type, b []byte) (n int) {
112 switch typ {
113 case VarintType:
114 _, n = ConsumeVarint(b)
115 return n
116 case Fixed32Type:
117 _, n = ConsumeFixed32(b)
118 return n
119 case Fixed64Type:
120 _, n = ConsumeFixed64(b)
121 return n
122 case BytesType:
123 _, n = ConsumeBytes(b)
124 return n
125 case StartGroupType:
126 n0 := len(b)
127 for {
128 num2, typ2, n := ConsumeTag(b)
129 if n < 0 {
130 return n // forward error code
131 }
132 b = b[n:]
133 if typ2 == EndGroupType {
134 if num != num2 {
135 return errCodeEndGroup
136 }
137 return n0 - len(b)
138 }
139
140 n = ConsumeFieldValue(num2, typ2, b)
141 if n < 0 {
142 return n // forward error code
143 }
144 b = b[n:]
145 }
146 case EndGroupType:
147 return errCodeEndGroup
148 default:
149 return errCodeReserved
150 }
151}
152
153// AppendTag encodes num and typ as a varint-encoded tag and appends it to b.
154func AppendTag(b []byte, num Number, typ Type) []byte {
155 return AppendVarint(b, EncodeTag(num, typ))
156}
157
158// ConsumeTag parses b as a varint-encoded tag, reporting its length.
159// This returns a negative length upon an error (see ParseError).
160func ConsumeTag(b []byte) (Number, Type, int) {
161 v, n := ConsumeVarint(b)
162 if n < 0 {
163 return 0, 0, n // forward error code
164 }
165 num, typ := DecodeTag(v)
166 if num < MinValidNumber {
167 return 0, 0, errCodeFieldNumber
168 }
169 return num, typ, n
170}
171
172func SizeTag(num Number) int {
173 return SizeVarint(EncodeTag(num, 0)) // wire type has no effect on size
174}
175
176// AppendVarint appends v to b as a varint-encoded uint64.
177func AppendVarint(b []byte, v uint64) []byte {
178 // TODO: Specialize for sizes 1 and 2 with mid-stack inlining.
179 switch {
180 case v < 1<<7:
181 b = append(b, byte(v))
182 case v < 1<<14:
183 b = append(b,
184 byte((v>>0)&0x7f|0x80),
185 byte(v>>7))
186 case v < 1<<21:
187 b = append(b,
188 byte((v>>0)&0x7f|0x80),
189 byte((v>>7)&0x7f|0x80),
190 byte(v>>14))
191 case v < 1<<28:
192 b = append(b,
193 byte((v>>0)&0x7f|0x80),
194 byte((v>>7)&0x7f|0x80),
195 byte((v>>14)&0x7f|0x80),
196 byte(v>>21))
197 case v < 1<<35:
198 b = append(b,
199 byte((v>>0)&0x7f|0x80),
200 byte((v>>7)&0x7f|0x80),
201 byte((v>>14)&0x7f|0x80),
202 byte((v>>21)&0x7f|0x80),
203 byte(v>>28))
204 case v < 1<<42:
205 b = append(b,
206 byte((v>>0)&0x7f|0x80),
207 byte((v>>7)&0x7f|0x80),
208 byte((v>>14)&0x7f|0x80),
209 byte((v>>21)&0x7f|0x80),
210 byte((v>>28)&0x7f|0x80),
211 byte(v>>35))
212 case v < 1<<49:
213 b = append(b,
214 byte((v>>0)&0x7f|0x80),
215 byte((v>>7)&0x7f|0x80),
216 byte((v>>14)&0x7f|0x80),
217 byte((v>>21)&0x7f|0x80),
218 byte((v>>28)&0x7f|0x80),
219 byte((v>>35)&0x7f|0x80),
220 byte(v>>42))
221 case v < 1<<56:
222 b = append(b,
223 byte((v>>0)&0x7f|0x80),
224 byte((v>>7)&0x7f|0x80),
225 byte((v>>14)&0x7f|0x80),
226 byte((v>>21)&0x7f|0x80),
227 byte((v>>28)&0x7f|0x80),
228 byte((v>>35)&0x7f|0x80),
229 byte((v>>42)&0x7f|0x80),
230 byte(v>>49))
231 case v < 1<<63:
232 b = append(b,
233 byte((v>>0)&0x7f|0x80),
234 byte((v>>7)&0x7f|0x80),
235 byte((v>>14)&0x7f|0x80),
236 byte((v>>21)&0x7f|0x80),
237 byte((v>>28)&0x7f|0x80),
238 byte((v>>35)&0x7f|0x80),
239 byte((v>>42)&0x7f|0x80),
240 byte((v>>49)&0x7f|0x80),
241 byte(v>>56))
242 default:
243 b = append(b,
244 byte((v>>0)&0x7f|0x80),
245 byte((v>>7)&0x7f|0x80),
246 byte((v>>14)&0x7f|0x80),
247 byte((v>>21)&0x7f|0x80),
248 byte((v>>28)&0x7f|0x80),
249 byte((v>>35)&0x7f|0x80),
250 byte((v>>42)&0x7f|0x80),
251 byte((v>>49)&0x7f|0x80),
252 byte((v>>56)&0x7f|0x80),
253 1)
254 }
255 return b
256}
257
258// ConsumeVarint parses b as a varint-encoded uint64, reporting its length.
259// This returns a negative length upon an error (see ParseError).
260func ConsumeVarint(b []byte) (v uint64, n int) {
261 // TODO: Specialize for sizes 1 and 2 with mid-stack inlining.
262 var y uint64
263 if len(b) <= 0 {
264 return 0, errCodeTruncated
265 }
266 v = uint64(b[0])
267 if v < 0x80 {
268 return v, 1
269 }
270 v -= 0x80
271
272 if len(b) <= 1 {
273 return 0, errCodeTruncated
274 }
275 y = uint64(b[1])
276 v += y << 7
277 if y < 0x80 {
278 return v, 2
279 }
280 v -= 0x80 << 7
281
282 if len(b) <= 2 {
283 return 0, errCodeTruncated
284 }
285 y = uint64(b[2])
286 v += y << 14
287 if y < 0x80 {
288 return v, 3
289 }
290 v -= 0x80 << 14
291
292 if len(b) <= 3 {
293 return 0, errCodeTruncated
294 }
295 y = uint64(b[3])
296 v += y << 21
297 if y < 0x80 {
298 return v, 4
299 }
300 v -= 0x80 << 21
301
302 if len(b) <= 4 {
303 return 0, errCodeTruncated
304 }
305 y = uint64(b[4])
306 v += y << 28
307 if y < 0x80 {
308 return v, 5
309 }
310 v -= 0x80 << 28
311
312 if len(b) <= 5 {
313 return 0, errCodeTruncated
314 }
315 y = uint64(b[5])
316 v += y << 35
317 if y < 0x80 {
318 return v, 6
319 }
320 v -= 0x80 << 35
321
322 if len(b) <= 6 {
323 return 0, errCodeTruncated
324 }
325 y = uint64(b[6])
326 v += y << 42
327 if y < 0x80 {
328 return v, 7
329 }
330 v -= 0x80 << 42
331
332 if len(b) <= 7 {
333 return 0, errCodeTruncated
334 }
335 y = uint64(b[7])
336 v += y << 49
337 if y < 0x80 {
338 return v, 8
339 }
340 v -= 0x80 << 49
341
342 if len(b) <= 8 {
343 return 0, errCodeTruncated
344 }
345 y = uint64(b[8])
346 v += y << 56
347 if y < 0x80 {
348 return v, 9
349 }
350 v -= 0x80 << 56
351
352 if len(b) <= 9 {
353 return 0, errCodeTruncated
354 }
355 y = uint64(b[9])
356 v += y << 63
357 if y < 2 {
358 return v, 10
359 }
360 return 0, errCodeOverflow
361}
362
363// SizeVarint returns the encoded size of a varint.
364// The size is guaranteed to be within 1 and 10, inclusive.
365func SizeVarint(v uint64) int {
366 return 1 + (bits.Len64(v)-1)/7
367}
368
369// AppendFixed32 appends v to b as a little-endian uint32.
370func AppendFixed32(b []byte, v uint32) []byte {
371 return append(b,
372 byte(v>>0),
373 byte(v>>8),
374 byte(v>>16),
375 byte(v>>24))
376}
377
378// ConsumeFixed32 parses b as a little-endian uint32, reporting its length.
379// This returns a negative length upon an error (see ParseError).
380func ConsumeFixed32(b []byte) (v uint32, n int) {
381 if len(b) < 4 {
382 return 0, errCodeTruncated
383 }
384 v = uint32(b[0])<<0 | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
385 return v, 4
386}
387
388// SizeFixed32 returns the encoded size of a fixed32; which is always 4.
389func SizeFixed32() int {
390 return 4
391}
392
393// AppendFixed64 appends v to b as a little-endian uint64.
394func AppendFixed64(b []byte, v uint64) []byte {
395 return append(b,
396 byte(v>>0),
397 byte(v>>8),
398 byte(v>>16),
399 byte(v>>24),
400 byte(v>>32),
401 byte(v>>40),
402 byte(v>>48),
403 byte(v>>56))
404}
405
406// ConsumeFixed64 parses b as a little-endian uint64, reporting its length.
407// This returns a negative length upon an error (see ParseError).
408func ConsumeFixed64(b []byte) (v uint64, n int) {
409 if len(b) < 8 {
410 return 0, errCodeTruncated
411 }
412 v = uint64(b[0])<<0 | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
413 return v, 8
414}
415
416// SizeFixed64 returns the encoded size of a fixed64; which is always 8.
417func SizeFixed64() int {
418 return 8
419}
420
421// AppendBytes appends v to b as a length-prefixed bytes value.
422func AppendBytes(b []byte, v []byte) []byte {
423 return append(AppendVarint(b, uint64(len(v))), v...)
424}
425
426// ConsumeBytes parses b as a length-prefixed bytes value, reporting its length.
427// This returns a negative length upon an error (see ParseError).
428func ConsumeBytes(b []byte) (v []byte, n int) {
429 m, n := ConsumeVarint(b)
430 if n < 0 {
431 return nil, n // forward error code
432 }
433 if m > uint64(len(b[n:])) {
434 return nil, errCodeTruncated
435 }
436 return b[n:][:m], n + int(m)
437}
438
439// SizeBytes returns the encoded size of a length-prefixed bytes value,
440// given only the length.
441func SizeBytes(n int) int {
442 return SizeVarint(uint64(n)) + n
443}
444
445// AppendGroup appends v to b as group value, with a trailing end group marker.
446// The value v must not contain the end marker.
447func AppendGroup(b []byte, num Number, v []byte) []byte {
448 return AppendVarint(append(b, v...), EncodeTag(num, EndGroupType))
449}
450
451// ConsumeGroup parses b as a group value until the trailing end group marker,
452// and verifies that the end marker matches the provided num. The value v
453// does not contain the end marker, while the length does contain the end marker.
454// This returns a negative length upon an error (see ParseError).
455func ConsumeGroup(num Number, b []byte) (v []byte, n int) {
456 n = ConsumeFieldValue(num, StartGroupType, b)
457 if n < 0 {
458 return nil, n // forward error code
459 }
460 b = b[:n]
461
462 // Truncate off end group marker, but need to handle denormalized varints.
463 // Assuming end marker is never 0 (which is always the case since
464 // EndGroupType is non-zero), we can truncate all trailing bytes where the
465 // lower 7 bits are all zero (implying that the varint is denormalized).
466 for len(b) > 0 && b[len(b)-1]&0x7f == 0 {
467 b = b[:len(b)-1]
468 }
469 b = b[:len(b)-SizeTag(num)]
470 return b, n
471}
472
473// SizeGroup returns the encoded size of a group, given only the length.
474func SizeGroup(num Number, n int) int {
475 return n + SizeTag(num)
476}
477
478// DecodeTag decodes the field Number and wire Type from its unified form.
479// The Number is -1 if the decoded field number overflows.
480// Other than overflow, this does not check for field number validity.
481func DecodeTag(x uint64) (Number, Type) {
482 num := Number(x >> 3)
483 if num > MaxValidNumber {
484 num = -1
485 }
486 return num, Type(x & 7)
487}
488
489// EncodeTag encodes the field Number and wire Type into its unified form.
490func EncodeTag(num Number, typ Type) uint64 {
491 return uint64(num)<<3 | uint64(typ&7)
492}
493
494// DecodeZigZag decodes a zig-zag-encoded uint64 as an int64.
495// Input: {…, 5, 3, 1, 0, 2, 4, 6, …}
496// Output: {…, -3, -2, -1, 0, +1, +2, +3, …}
497func DecodeZigZag(x uint64) int64 {
498 return int64(x>>1) ^ int64(x)<<63>>63
499}
500
501// EncodeZigZag encodes an int64 as a zig-zag-encoded uint64.
502// Input: {…, -3, -2, -1, 0, +1, +2, +3, …}
503// Output: {…, 5, 3, 1, 0, 2, 4, 6, …}
504func EncodeZigZag(x int64) uint64 {
505 return uint64(x<<1) ^ uint64(x>>63)
506}
507
508// DecodeBool decodes a uint64 as a bool.
509// Input: { 0, 1, 2, …}
510// Output: {false, true, true, …}
511func DecodeBool(x uint64) bool {
512 return x != 0
513}
514
515// EncodeBool encodes a bool as a uint64.
516// Input: {false, true}
517// Output: { 0, 1}
518func EncodeBool(x bool) uint64 {
519 if x {
520 return 1
521 }
522 return 0
523}