blob: 32622b11fe63784cf5b0c569e55a60c113d7a0a7 [file] [log] [blame]
Joe Tsai22505a42018-08-01 13:12:49 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package wire parses and formats the protobuf wire encoding.
6//
7// See https://developers.google.com/protocol-buffers/docs/encoding.
8package wire
9
10import (
Joe Tsai22505a42018-08-01 13:12:49 -070011 "io"
Joe Tsai972d8732019-05-07 14:10:25 -070012 "math"
Joe Tsai22505a42018-08-01 13:12:49 -070013 "math/bits"
Joe Tsai0e6baaa2018-08-03 17:55:44 -070014
Damien Neile89e6242019-05-13 23:55:40 -070015 "google.golang.org/protobuf/internal/errors"
Joe Tsai22505a42018-08-01 13:12:49 -070016)
17
18// Number represents the field number.
19type Number int32
20
21const (
22 MinValidNumber Number = 1
23 FirstReservedNumber Number = 19000
24 LastReservedNumber Number = 19999
25 MaxValidNumber Number = 1<<29 - 1
26)
27
28// IsValid reports whether the field number is semantically valid.
29//
30// Note that while numbers within the reserved range are semantically invalid,
31// they are syntactically valid in the wire format.
32// Implementations may treat records with reserved field numbers as unknown.
33func (n Number) IsValid() bool {
34 return MinValidNumber <= n && n < FirstReservedNumber || LastReservedNumber < n && n <= MaxValidNumber
35}
36
37// Type represents the wire type.
38type Type int8
39
40const (
41 VarintType Type = 0
42 Fixed32Type Type = 5
43 Fixed64Type Type = 1
44 BytesType Type = 2
45 StartGroupType Type = 3
46 EndGroupType Type = 4
47)
48
49const (
50 _ = -iota
51 errCodeTruncated
52 errCodeFieldNumber
53 errCodeOverflow
54 errCodeReserved
55 errCodeEndGroup
56)
57
58var (
59 errFieldNumber = errors.New("invalid field number")
60 errOverflow = errors.New("variable length integer overflow")
61 errReserved = errors.New("cannot parse reserved wire type")
62 errEndGroup = errors.New("mismatching end group marker")
63 errParse = errors.New("parse error")
64)
65
66// ParseError converts an error code into an error value.
67// This returns nil if n is a non-negative number.
68func ParseError(n int) error {
69 if n >= 0 {
70 return nil
71 }
72 switch n {
73 case errCodeTruncated:
74 return io.ErrUnexpectedEOF
75 case errCodeFieldNumber:
76 return errFieldNumber
77 case errCodeOverflow:
78 return errOverflow
79 case errCodeReserved:
80 return errReserved
81 case errCodeEndGroup:
82 return errEndGroup
83 default:
84 return errParse
85 }
86}
87
88// ConsumeField parses an entire field record (both tag and value) and returns
89// the field number, the wire type, and the total length.
90// This returns a negative length upon an error (see ParseError).
91//
92// The total length includes the tag header and the end group marker (if the
93// field is a group).
94func ConsumeField(b []byte) (Number, Type, int) {
95 num, typ, n := ConsumeTag(b)
96 if n < 0 {
97 return 0, 0, n // forward error code
98 }
99 m := ConsumeFieldValue(num, typ, b[n:])
100 if m < 0 {
101 return 0, 0, m // forward error code
102 }
103 return num, typ, n + m
104}
105
106// ConsumeFieldValue parses a field value and returns its length.
107// This assumes that the field Number and wire Type have already been parsed.
108// This returns a negative length upon an error (see ParseError).
109//
110// When parsing a group, the length includes the end group marker and
111// the end group is verified to match the starting field number.
112func ConsumeFieldValue(num Number, typ Type, b []byte) (n int) {
113 switch typ {
114 case VarintType:
115 _, n = ConsumeVarint(b)
116 return n
117 case Fixed32Type:
118 _, n = ConsumeFixed32(b)
119 return n
120 case Fixed64Type:
121 _, n = ConsumeFixed64(b)
122 return n
123 case BytesType:
124 _, n = ConsumeBytes(b)
125 return n
126 case StartGroupType:
127 n0 := len(b)
128 for {
129 num2, typ2, n := ConsumeTag(b)
130 if n < 0 {
131 return n // forward error code
132 }
133 b = b[n:]
134 if typ2 == EndGroupType {
135 if num != num2 {
136 return errCodeEndGroup
137 }
138 return n0 - len(b)
139 }
140
141 n = ConsumeFieldValue(num2, typ2, b)
142 if n < 0 {
143 return n // forward error code
144 }
145 b = b[n:]
146 }
147 case EndGroupType:
148 return errCodeEndGroup
149 default:
150 return errCodeReserved
151 }
152}
153
154// AppendTag encodes num and typ as a varint-encoded tag and appends it to b.
155func AppendTag(b []byte, num Number, typ Type) []byte {
156 return AppendVarint(b, EncodeTag(num, typ))
157}
158
159// ConsumeTag parses b as a varint-encoded tag, reporting its length.
160// This returns a negative length upon an error (see ParseError).
161func ConsumeTag(b []byte) (Number, Type, int) {
162 v, n := ConsumeVarint(b)
163 if n < 0 {
164 return 0, 0, n // forward error code
165 }
166 num, typ := DecodeTag(v)
167 if num < MinValidNumber {
168 return 0, 0, errCodeFieldNumber
169 }
170 return num, typ, n
171}
172
173func SizeTag(num Number) int {
174 return SizeVarint(EncodeTag(num, 0)) // wire type has no effect on size
175}
176
177// AppendVarint appends v to b as a varint-encoded uint64.
178func AppendVarint(b []byte, v uint64) []byte {
179 // TODO: Specialize for sizes 1 and 2 with mid-stack inlining.
180 switch {
181 case v < 1<<7:
182 b = append(b, byte(v))
183 case v < 1<<14:
184 b = append(b,
185 byte((v>>0)&0x7f|0x80),
186 byte(v>>7))
187 case v < 1<<21:
188 b = append(b,
189 byte((v>>0)&0x7f|0x80),
190 byte((v>>7)&0x7f|0x80),
191 byte(v>>14))
192 case v < 1<<28:
193 b = append(b,
194 byte((v>>0)&0x7f|0x80),
195 byte((v>>7)&0x7f|0x80),
196 byte((v>>14)&0x7f|0x80),
197 byte(v>>21))
198 case v < 1<<35:
199 b = append(b,
200 byte((v>>0)&0x7f|0x80),
201 byte((v>>7)&0x7f|0x80),
202 byte((v>>14)&0x7f|0x80),
203 byte((v>>21)&0x7f|0x80),
204 byte(v>>28))
205 case v < 1<<42:
206 b = append(b,
207 byte((v>>0)&0x7f|0x80),
208 byte((v>>7)&0x7f|0x80),
209 byte((v>>14)&0x7f|0x80),
210 byte((v>>21)&0x7f|0x80),
211 byte((v>>28)&0x7f|0x80),
212 byte(v>>35))
213 case v < 1<<49:
214 b = append(b,
215 byte((v>>0)&0x7f|0x80),
216 byte((v>>7)&0x7f|0x80),
217 byte((v>>14)&0x7f|0x80),
218 byte((v>>21)&0x7f|0x80),
219 byte((v>>28)&0x7f|0x80),
220 byte((v>>35)&0x7f|0x80),
221 byte(v>>42))
222 case v < 1<<56:
223 b = append(b,
224 byte((v>>0)&0x7f|0x80),
225 byte((v>>7)&0x7f|0x80),
226 byte((v>>14)&0x7f|0x80),
227 byte((v>>21)&0x7f|0x80),
228 byte((v>>28)&0x7f|0x80),
229 byte((v>>35)&0x7f|0x80),
230 byte((v>>42)&0x7f|0x80),
231 byte(v>>49))
232 case v < 1<<63:
233 b = append(b,
234 byte((v>>0)&0x7f|0x80),
235 byte((v>>7)&0x7f|0x80),
236 byte((v>>14)&0x7f|0x80),
237 byte((v>>21)&0x7f|0x80),
238 byte((v>>28)&0x7f|0x80),
239 byte((v>>35)&0x7f|0x80),
240 byte((v>>42)&0x7f|0x80),
241 byte((v>>49)&0x7f|0x80),
242 byte(v>>56))
243 default:
244 b = append(b,
245 byte((v>>0)&0x7f|0x80),
246 byte((v>>7)&0x7f|0x80),
247 byte((v>>14)&0x7f|0x80),
248 byte((v>>21)&0x7f|0x80),
249 byte((v>>28)&0x7f|0x80),
250 byte((v>>35)&0x7f|0x80),
251 byte((v>>42)&0x7f|0x80),
252 byte((v>>49)&0x7f|0x80),
253 byte((v>>56)&0x7f|0x80),
254 1)
255 }
256 return b
257}
258
259// ConsumeVarint parses b as a varint-encoded uint64, reporting its length.
260// This returns a negative length upon an error (see ParseError).
261func ConsumeVarint(b []byte) (v uint64, n int) {
262 // TODO: Specialize for sizes 1 and 2 with mid-stack inlining.
263 var y uint64
264 if len(b) <= 0 {
265 return 0, errCodeTruncated
266 }
267 v = uint64(b[0])
268 if v < 0x80 {
269 return v, 1
270 }
271 v -= 0x80
272
273 if len(b) <= 1 {
274 return 0, errCodeTruncated
275 }
276 y = uint64(b[1])
277 v += y << 7
278 if y < 0x80 {
279 return v, 2
280 }
281 v -= 0x80 << 7
282
283 if len(b) <= 2 {
284 return 0, errCodeTruncated
285 }
286 y = uint64(b[2])
287 v += y << 14
288 if y < 0x80 {
289 return v, 3
290 }
291 v -= 0x80 << 14
292
293 if len(b) <= 3 {
294 return 0, errCodeTruncated
295 }
296 y = uint64(b[3])
297 v += y << 21
298 if y < 0x80 {
299 return v, 4
300 }
301 v -= 0x80 << 21
302
303 if len(b) <= 4 {
304 return 0, errCodeTruncated
305 }
306 y = uint64(b[4])
307 v += y << 28
308 if y < 0x80 {
309 return v, 5
310 }
311 v -= 0x80 << 28
312
313 if len(b) <= 5 {
314 return 0, errCodeTruncated
315 }
316 y = uint64(b[5])
317 v += y << 35
318 if y < 0x80 {
319 return v, 6
320 }
321 v -= 0x80 << 35
322
323 if len(b) <= 6 {
324 return 0, errCodeTruncated
325 }
326 y = uint64(b[6])
327 v += y << 42
328 if y < 0x80 {
329 return v, 7
330 }
331 v -= 0x80 << 42
332
333 if len(b) <= 7 {
334 return 0, errCodeTruncated
335 }
336 y = uint64(b[7])
337 v += y << 49
338 if y < 0x80 {
339 return v, 8
340 }
341 v -= 0x80 << 49
342
343 if len(b) <= 8 {
344 return 0, errCodeTruncated
345 }
346 y = uint64(b[8])
347 v += y << 56
348 if y < 0x80 {
349 return v, 9
350 }
351 v -= 0x80 << 56
352
353 if len(b) <= 9 {
354 return 0, errCodeTruncated
355 }
356 y = uint64(b[9])
357 v += y << 63
358 if y < 2 {
359 return v, 10
360 }
361 return 0, errCodeOverflow
362}
363
364// SizeVarint returns the encoded size of a varint.
365// The size is guaranteed to be within 1 and 10, inclusive.
366func SizeVarint(v uint64) int {
367 return 1 + (bits.Len64(v)-1)/7
368}
369
370// AppendFixed32 appends v to b as a little-endian uint32.
371func AppendFixed32(b []byte, v uint32) []byte {
372 return append(b,
373 byte(v>>0),
374 byte(v>>8),
375 byte(v>>16),
376 byte(v>>24))
377}
378
379// ConsumeFixed32 parses b as a little-endian uint32, reporting its length.
380// This returns a negative length upon an error (see ParseError).
381func ConsumeFixed32(b []byte) (v uint32, n int) {
382 if len(b) < 4 {
383 return 0, errCodeTruncated
384 }
385 v = uint32(b[0])<<0 | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
386 return v, 4
387}
388
389// SizeFixed32 returns the encoded size of a fixed32; which is always 4.
390func SizeFixed32() int {
391 return 4
392}
393
394// AppendFixed64 appends v to b as a little-endian uint64.
395func AppendFixed64(b []byte, v uint64) []byte {
396 return append(b,
397 byte(v>>0),
398 byte(v>>8),
399 byte(v>>16),
400 byte(v>>24),
401 byte(v>>32),
402 byte(v>>40),
403 byte(v>>48),
404 byte(v>>56))
405}
406
407// ConsumeFixed64 parses b as a little-endian uint64, reporting its length.
408// This returns a negative length upon an error (see ParseError).
409func ConsumeFixed64(b []byte) (v uint64, n int) {
410 if len(b) < 8 {
411 return 0, errCodeTruncated
412 }
413 v = uint64(b[0])<<0 | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
414 return v, 8
415}
416
417// SizeFixed64 returns the encoded size of a fixed64; which is always 8.
418func SizeFixed64() int {
419 return 8
420}
421
422// AppendBytes appends v to b as a length-prefixed bytes value.
423func AppendBytes(b []byte, v []byte) []byte {
424 return append(AppendVarint(b, uint64(len(v))), v...)
425}
426
427// ConsumeBytes parses b as a length-prefixed bytes value, reporting its length.
428// This returns a negative length upon an error (see ParseError).
429func ConsumeBytes(b []byte) (v []byte, n int) {
430 m, n := ConsumeVarint(b)
431 if n < 0 {
432 return nil, n // forward error code
433 }
434 if m > uint64(len(b[n:])) {
435 return nil, errCodeTruncated
436 }
437 return b[n:][:m], n + int(m)
438}
439
440// SizeBytes returns the encoded size of a length-prefixed bytes value,
441// given only the length.
442func SizeBytes(n int) int {
443 return SizeVarint(uint64(n)) + n
444}
445
Damien Neile91877d2019-06-27 10:54:42 -0700446// AppendString appends v to b as a length-prefixed bytes value.
447func AppendString(b []byte, v string) []byte {
448 return append(AppendVarint(b, uint64(len(v))), v...)
449}
450
451// ConsumeString parses b as a length-prefixed bytes value, reporting its length.
452// This returns a negative length upon an error (see ParseError).
453func ConsumeString(b []byte) (v string, n int) {
454 bb, n := ConsumeBytes(b)
455 return string(bb), n
456}
457
Joe Tsai22505a42018-08-01 13:12:49 -0700458// AppendGroup appends v to b as group value, with a trailing end group marker.
459// The value v must not contain the end marker.
460func AppendGroup(b []byte, num Number, v []byte) []byte {
461 return AppendVarint(append(b, v...), EncodeTag(num, EndGroupType))
462}
463
464// ConsumeGroup parses b as a group value until the trailing end group marker,
465// and verifies that the end marker matches the provided num. The value v
466// does not contain the end marker, while the length does contain the end marker.
467// This returns a negative length upon an error (see ParseError).
468func ConsumeGroup(num Number, b []byte) (v []byte, n int) {
469 n = ConsumeFieldValue(num, StartGroupType, b)
470 if n < 0 {
471 return nil, n // forward error code
472 }
473 b = b[:n]
474
475 // Truncate off end group marker, but need to handle denormalized varints.
476 // Assuming end marker is never 0 (which is always the case since
477 // EndGroupType is non-zero), we can truncate all trailing bytes where the
478 // lower 7 bits are all zero (implying that the varint is denormalized).
479 for len(b) > 0 && b[len(b)-1]&0x7f == 0 {
480 b = b[:len(b)-1]
481 }
482 b = b[:len(b)-SizeTag(num)]
483 return b, n
484}
485
486// SizeGroup returns the encoded size of a group, given only the length.
487func SizeGroup(num Number, n int) int {
488 return n + SizeTag(num)
489}
490
491// DecodeTag decodes the field Number and wire Type from its unified form.
Damien Neilfe15dd42019-12-06 15:36:03 -0800492// The Number is -1 if the decoded field number overflows int32.
Joe Tsai22505a42018-08-01 13:12:49 -0700493// Other than overflow, this does not check for field number validity.
494func DecodeTag(x uint64) (Number, Type) {
Joe Tsai972d8732019-05-07 14:10:25 -0700495 // NOTE: MessageSet allows for larger field numbers than normal.
Damien Neilfe15dd42019-12-06 15:36:03 -0800496 if x>>3 > uint64(math.MaxInt32) {
497 return -1, 0
Joe Tsai22505a42018-08-01 13:12:49 -0700498 }
Joe Tsai972d8732019-05-07 14:10:25 -0700499 return Number(x >> 3), Type(x & 7)
Joe Tsai22505a42018-08-01 13:12:49 -0700500}
501
502// EncodeTag encodes the field Number and wire Type into its unified form.
503func EncodeTag(num Number, typ Type) uint64 {
504 return uint64(num)<<3 | uint64(typ&7)
505}
506
507// DecodeZigZag decodes a zig-zag-encoded uint64 as an int64.
508// Input: {…, 5, 3, 1, 0, 2, 4, 6, …}
509// Output: {…, -3, -2, -1, 0, +1, +2, +3, …}
510func DecodeZigZag(x uint64) int64 {
511 return int64(x>>1) ^ int64(x)<<63>>63
512}
513
514// EncodeZigZag encodes an int64 as a zig-zag-encoded uint64.
515// Input: {…, -3, -2, -1, 0, +1, +2, +3, …}
516// Output: {…, 5, 3, 1, 0, 2, 4, 6, …}
517func EncodeZigZag(x int64) uint64 {
518 return uint64(x<<1) ^ uint64(x>>63)
519}
520
521// DecodeBool decodes a uint64 as a bool.
522// Input: { 0, 1, 2, …}
523// Output: {false, true, true, …}
524func DecodeBool(x uint64) bool {
525 return x != 0
526}
527
528// EncodeBool encodes a bool as a uint64.
529// Input: {false, true}
530// Output: { 0, 1}
531func EncodeBool(x bool) uint64 {
532 if x {
533 return 1
534 }
535 return 0
536}