blob: 224ddf737a4e64dd0cb21a815a597cf7acddb015 [file] [log] [blame]
Joe Tsai22505a42018-08-01 13:12:49 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package wire parses and formats the protobuf wire encoding.
6//
7// See https://developers.google.com/protocol-buffers/docs/encoding.
8package wire
9
10import (
Joe Tsai22505a42018-08-01 13:12:49 -070011 "io"
Joe Tsai972d8732019-05-07 14:10:25 -070012 "math"
Joe Tsai22505a42018-08-01 13:12:49 -070013 "math/bits"
Joe Tsai0e6baaa2018-08-03 17:55:44 -070014
Damien Neile89e6242019-05-13 23:55:40 -070015 "google.golang.org/protobuf/internal/errors"
16 "google.golang.org/protobuf/internal/flags"
Joe Tsai22505a42018-08-01 13:12:49 -070017)
18
19// Number represents the field number.
20type Number int32
21
22const (
23 MinValidNumber Number = 1
24 FirstReservedNumber Number = 19000
25 LastReservedNumber Number = 19999
26 MaxValidNumber Number = 1<<29 - 1
27)
28
29// IsValid reports whether the field number is semantically valid.
30//
31// Note that while numbers within the reserved range are semantically invalid,
32// they are syntactically valid in the wire format.
33// Implementations may treat records with reserved field numbers as unknown.
34func (n Number) IsValid() bool {
35 return MinValidNumber <= n && n < FirstReservedNumber || LastReservedNumber < n && n <= MaxValidNumber
36}
37
38// Type represents the wire type.
39type Type int8
40
41const (
42 VarintType Type = 0
43 Fixed32Type Type = 5
44 Fixed64Type Type = 1
45 BytesType Type = 2
46 StartGroupType Type = 3
47 EndGroupType Type = 4
48)
49
50const (
51 _ = -iota
52 errCodeTruncated
53 errCodeFieldNumber
54 errCodeOverflow
55 errCodeReserved
56 errCodeEndGroup
57)
58
59var (
60 errFieldNumber = errors.New("invalid field number")
61 errOverflow = errors.New("variable length integer overflow")
62 errReserved = errors.New("cannot parse reserved wire type")
63 errEndGroup = errors.New("mismatching end group marker")
64 errParse = errors.New("parse error")
65)
66
67// ParseError converts an error code into an error value.
68// This returns nil if n is a non-negative number.
69func ParseError(n int) error {
70 if n >= 0 {
71 return nil
72 }
73 switch n {
74 case errCodeTruncated:
75 return io.ErrUnexpectedEOF
76 case errCodeFieldNumber:
77 return errFieldNumber
78 case errCodeOverflow:
79 return errOverflow
80 case errCodeReserved:
81 return errReserved
82 case errCodeEndGroup:
83 return errEndGroup
84 default:
85 return errParse
86 }
87}
88
89// ConsumeField parses an entire field record (both tag and value) and returns
90// the field number, the wire type, and the total length.
91// This returns a negative length upon an error (see ParseError).
92//
93// The total length includes the tag header and the end group marker (if the
94// field is a group).
95func ConsumeField(b []byte) (Number, Type, int) {
96 num, typ, n := ConsumeTag(b)
97 if n < 0 {
98 return 0, 0, n // forward error code
99 }
100 m := ConsumeFieldValue(num, typ, b[n:])
101 if m < 0 {
102 return 0, 0, m // forward error code
103 }
104 return num, typ, n + m
105}
106
107// ConsumeFieldValue parses a field value and returns its length.
108// This assumes that the field Number and wire Type have already been parsed.
109// This returns a negative length upon an error (see ParseError).
110//
111// When parsing a group, the length includes the end group marker and
112// the end group is verified to match the starting field number.
113func ConsumeFieldValue(num Number, typ Type, b []byte) (n int) {
114 switch typ {
115 case VarintType:
116 _, n = ConsumeVarint(b)
117 return n
118 case Fixed32Type:
119 _, n = ConsumeFixed32(b)
120 return n
121 case Fixed64Type:
122 _, n = ConsumeFixed64(b)
123 return n
124 case BytesType:
125 _, n = ConsumeBytes(b)
126 return n
127 case StartGroupType:
128 n0 := len(b)
129 for {
130 num2, typ2, n := ConsumeTag(b)
131 if n < 0 {
132 return n // forward error code
133 }
134 b = b[n:]
135 if typ2 == EndGroupType {
136 if num != num2 {
137 return errCodeEndGroup
138 }
139 return n0 - len(b)
140 }
141
142 n = ConsumeFieldValue(num2, typ2, b)
143 if n < 0 {
144 return n // forward error code
145 }
146 b = b[n:]
147 }
148 case EndGroupType:
149 return errCodeEndGroup
150 default:
151 return errCodeReserved
152 }
153}
154
155// AppendTag encodes num and typ as a varint-encoded tag and appends it to b.
156func AppendTag(b []byte, num Number, typ Type) []byte {
157 return AppendVarint(b, EncodeTag(num, typ))
158}
159
160// ConsumeTag parses b as a varint-encoded tag, reporting its length.
161// This returns a negative length upon an error (see ParseError).
162func ConsumeTag(b []byte) (Number, Type, int) {
163 v, n := ConsumeVarint(b)
164 if n < 0 {
165 return 0, 0, n // forward error code
166 }
167 num, typ := DecodeTag(v)
168 if num < MinValidNumber {
169 return 0, 0, errCodeFieldNumber
170 }
171 return num, typ, n
172}
173
174func SizeTag(num Number) int {
175 return SizeVarint(EncodeTag(num, 0)) // wire type has no effect on size
176}
177
178// AppendVarint appends v to b as a varint-encoded uint64.
179func AppendVarint(b []byte, v uint64) []byte {
180 // TODO: Specialize for sizes 1 and 2 with mid-stack inlining.
181 switch {
182 case v < 1<<7:
183 b = append(b, byte(v))
184 case v < 1<<14:
185 b = append(b,
186 byte((v>>0)&0x7f|0x80),
187 byte(v>>7))
188 case v < 1<<21:
189 b = append(b,
190 byte((v>>0)&0x7f|0x80),
191 byte((v>>7)&0x7f|0x80),
192 byte(v>>14))
193 case v < 1<<28:
194 b = append(b,
195 byte((v>>0)&0x7f|0x80),
196 byte((v>>7)&0x7f|0x80),
197 byte((v>>14)&0x7f|0x80),
198 byte(v>>21))
199 case v < 1<<35:
200 b = append(b,
201 byte((v>>0)&0x7f|0x80),
202 byte((v>>7)&0x7f|0x80),
203 byte((v>>14)&0x7f|0x80),
204 byte((v>>21)&0x7f|0x80),
205 byte(v>>28))
206 case v < 1<<42:
207 b = append(b,
208 byte((v>>0)&0x7f|0x80),
209 byte((v>>7)&0x7f|0x80),
210 byte((v>>14)&0x7f|0x80),
211 byte((v>>21)&0x7f|0x80),
212 byte((v>>28)&0x7f|0x80),
213 byte(v>>35))
214 case v < 1<<49:
215 b = append(b,
216 byte((v>>0)&0x7f|0x80),
217 byte((v>>7)&0x7f|0x80),
218 byte((v>>14)&0x7f|0x80),
219 byte((v>>21)&0x7f|0x80),
220 byte((v>>28)&0x7f|0x80),
221 byte((v>>35)&0x7f|0x80),
222 byte(v>>42))
223 case v < 1<<56:
224 b = append(b,
225 byte((v>>0)&0x7f|0x80),
226 byte((v>>7)&0x7f|0x80),
227 byte((v>>14)&0x7f|0x80),
228 byte((v>>21)&0x7f|0x80),
229 byte((v>>28)&0x7f|0x80),
230 byte((v>>35)&0x7f|0x80),
231 byte((v>>42)&0x7f|0x80),
232 byte(v>>49))
233 case v < 1<<63:
234 b = append(b,
235 byte((v>>0)&0x7f|0x80),
236 byte((v>>7)&0x7f|0x80),
237 byte((v>>14)&0x7f|0x80),
238 byte((v>>21)&0x7f|0x80),
239 byte((v>>28)&0x7f|0x80),
240 byte((v>>35)&0x7f|0x80),
241 byte((v>>42)&0x7f|0x80),
242 byte((v>>49)&0x7f|0x80),
243 byte(v>>56))
244 default:
245 b = append(b,
246 byte((v>>0)&0x7f|0x80),
247 byte((v>>7)&0x7f|0x80),
248 byte((v>>14)&0x7f|0x80),
249 byte((v>>21)&0x7f|0x80),
250 byte((v>>28)&0x7f|0x80),
251 byte((v>>35)&0x7f|0x80),
252 byte((v>>42)&0x7f|0x80),
253 byte((v>>49)&0x7f|0x80),
254 byte((v>>56)&0x7f|0x80),
255 1)
256 }
257 return b
258}
259
260// ConsumeVarint parses b as a varint-encoded uint64, reporting its length.
261// This returns a negative length upon an error (see ParseError).
262func ConsumeVarint(b []byte) (v uint64, n int) {
263 // TODO: Specialize for sizes 1 and 2 with mid-stack inlining.
264 var y uint64
265 if len(b) <= 0 {
266 return 0, errCodeTruncated
267 }
268 v = uint64(b[0])
269 if v < 0x80 {
270 return v, 1
271 }
272 v -= 0x80
273
274 if len(b) <= 1 {
275 return 0, errCodeTruncated
276 }
277 y = uint64(b[1])
278 v += y << 7
279 if y < 0x80 {
280 return v, 2
281 }
282 v -= 0x80 << 7
283
284 if len(b) <= 2 {
285 return 0, errCodeTruncated
286 }
287 y = uint64(b[2])
288 v += y << 14
289 if y < 0x80 {
290 return v, 3
291 }
292 v -= 0x80 << 14
293
294 if len(b) <= 3 {
295 return 0, errCodeTruncated
296 }
297 y = uint64(b[3])
298 v += y << 21
299 if y < 0x80 {
300 return v, 4
301 }
302 v -= 0x80 << 21
303
304 if len(b) <= 4 {
305 return 0, errCodeTruncated
306 }
307 y = uint64(b[4])
308 v += y << 28
309 if y < 0x80 {
310 return v, 5
311 }
312 v -= 0x80 << 28
313
314 if len(b) <= 5 {
315 return 0, errCodeTruncated
316 }
317 y = uint64(b[5])
318 v += y << 35
319 if y < 0x80 {
320 return v, 6
321 }
322 v -= 0x80 << 35
323
324 if len(b) <= 6 {
325 return 0, errCodeTruncated
326 }
327 y = uint64(b[6])
328 v += y << 42
329 if y < 0x80 {
330 return v, 7
331 }
332 v -= 0x80 << 42
333
334 if len(b) <= 7 {
335 return 0, errCodeTruncated
336 }
337 y = uint64(b[7])
338 v += y << 49
339 if y < 0x80 {
340 return v, 8
341 }
342 v -= 0x80 << 49
343
344 if len(b) <= 8 {
345 return 0, errCodeTruncated
346 }
347 y = uint64(b[8])
348 v += y << 56
349 if y < 0x80 {
350 return v, 9
351 }
352 v -= 0x80 << 56
353
354 if len(b) <= 9 {
355 return 0, errCodeTruncated
356 }
357 y = uint64(b[9])
358 v += y << 63
359 if y < 2 {
360 return v, 10
361 }
362 return 0, errCodeOverflow
363}
364
365// SizeVarint returns the encoded size of a varint.
366// The size is guaranteed to be within 1 and 10, inclusive.
367func SizeVarint(v uint64) int {
368 return 1 + (bits.Len64(v)-1)/7
369}
370
371// AppendFixed32 appends v to b as a little-endian uint32.
372func AppendFixed32(b []byte, v uint32) []byte {
373 return append(b,
374 byte(v>>0),
375 byte(v>>8),
376 byte(v>>16),
377 byte(v>>24))
378}
379
380// ConsumeFixed32 parses b as a little-endian uint32, reporting its length.
381// This returns a negative length upon an error (see ParseError).
382func ConsumeFixed32(b []byte) (v uint32, n int) {
383 if len(b) < 4 {
384 return 0, errCodeTruncated
385 }
386 v = uint32(b[0])<<0 | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
387 return v, 4
388}
389
390// SizeFixed32 returns the encoded size of a fixed32; which is always 4.
391func SizeFixed32() int {
392 return 4
393}
394
395// AppendFixed64 appends v to b as a little-endian uint64.
396func AppendFixed64(b []byte, v uint64) []byte {
397 return append(b,
398 byte(v>>0),
399 byte(v>>8),
400 byte(v>>16),
401 byte(v>>24),
402 byte(v>>32),
403 byte(v>>40),
404 byte(v>>48),
405 byte(v>>56))
406}
407
408// ConsumeFixed64 parses b as a little-endian uint64, reporting its length.
409// This returns a negative length upon an error (see ParseError).
410func ConsumeFixed64(b []byte) (v uint64, n int) {
411 if len(b) < 8 {
412 return 0, errCodeTruncated
413 }
414 v = uint64(b[0])<<0 | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
415 return v, 8
416}
417
418// SizeFixed64 returns the encoded size of a fixed64; which is always 8.
419func SizeFixed64() int {
420 return 8
421}
422
423// AppendBytes appends v to b as a length-prefixed bytes value.
424func AppendBytes(b []byte, v []byte) []byte {
425 return append(AppendVarint(b, uint64(len(v))), v...)
426}
427
428// ConsumeBytes parses b as a length-prefixed bytes value, reporting its length.
429// This returns a negative length upon an error (see ParseError).
430func ConsumeBytes(b []byte) (v []byte, n int) {
431 m, n := ConsumeVarint(b)
432 if n < 0 {
433 return nil, n // forward error code
434 }
435 if m > uint64(len(b[n:])) {
436 return nil, errCodeTruncated
437 }
438 return b[n:][:m], n + int(m)
439}
440
441// SizeBytes returns the encoded size of a length-prefixed bytes value,
442// given only the length.
443func SizeBytes(n int) int {
444 return SizeVarint(uint64(n)) + n
445}
446
Damien Neile91877d2019-06-27 10:54:42 -0700447// AppendString appends v to b as a length-prefixed bytes value.
448func AppendString(b []byte, v string) []byte {
449 return append(AppendVarint(b, uint64(len(v))), v...)
450}
451
452// ConsumeString parses b as a length-prefixed bytes value, reporting its length.
453// This returns a negative length upon an error (see ParseError).
454func ConsumeString(b []byte) (v string, n int) {
455 bb, n := ConsumeBytes(b)
456 return string(bb), n
457}
458
Joe Tsai22505a42018-08-01 13:12:49 -0700459// AppendGroup appends v to b as group value, with a trailing end group marker.
460// The value v must not contain the end marker.
461func AppendGroup(b []byte, num Number, v []byte) []byte {
462 return AppendVarint(append(b, v...), EncodeTag(num, EndGroupType))
463}
464
465// ConsumeGroup parses b as a group value until the trailing end group marker,
466// and verifies that the end marker matches the provided num. The value v
467// does not contain the end marker, while the length does contain the end marker.
468// This returns a negative length upon an error (see ParseError).
469func ConsumeGroup(num Number, b []byte) (v []byte, n int) {
470 n = ConsumeFieldValue(num, StartGroupType, b)
471 if n < 0 {
472 return nil, n // forward error code
473 }
474 b = b[:n]
475
476 // Truncate off end group marker, but need to handle denormalized varints.
477 // Assuming end marker is never 0 (which is always the case since
478 // EndGroupType is non-zero), we can truncate all trailing bytes where the
479 // lower 7 bits are all zero (implying that the varint is denormalized).
480 for len(b) > 0 && b[len(b)-1]&0x7f == 0 {
481 b = b[:len(b)-1]
482 }
483 b = b[:len(b)-SizeTag(num)]
484 return b, n
485}
486
487// SizeGroup returns the encoded size of a group, given only the length.
488func SizeGroup(num Number, n int) int {
489 return n + SizeTag(num)
490}
491
492// DecodeTag decodes the field Number and wire Type from its unified form.
493// The Number is -1 if the decoded field number overflows.
494// Other than overflow, this does not check for field number validity.
495func DecodeTag(x uint64) (Number, Type) {
Joe Tsai972d8732019-05-07 14:10:25 -0700496 // NOTE: MessageSet allows for larger field numbers than normal.
497 if flags.Proto1Legacy {
498 if x>>3 > uint64(math.MaxInt32) {
499 return -1, 0
500 }
501 } else {
502 if x>>3 > uint64(MaxValidNumber) {
503 return -1, 0
504 }
Joe Tsai22505a42018-08-01 13:12:49 -0700505 }
Joe Tsai972d8732019-05-07 14:10:25 -0700506 return Number(x >> 3), Type(x & 7)
Joe Tsai22505a42018-08-01 13:12:49 -0700507}
508
509// EncodeTag encodes the field Number and wire Type into its unified form.
510func EncodeTag(num Number, typ Type) uint64 {
511 return uint64(num)<<3 | uint64(typ&7)
512}
513
514// DecodeZigZag decodes a zig-zag-encoded uint64 as an int64.
515// Input: {…, 5, 3, 1, 0, 2, 4, 6, …}
516// Output: {…, -3, -2, -1, 0, +1, +2, +3, …}
517func DecodeZigZag(x uint64) int64 {
518 return int64(x>>1) ^ int64(x)<<63>>63
519}
520
521// EncodeZigZag encodes an int64 as a zig-zag-encoded uint64.
522// Input: {…, -3, -2, -1, 0, +1, +2, +3, …}
523// Output: {…, 5, 3, 1, 0, 2, 4, 6, …}
524func EncodeZigZag(x int64) uint64 {
525 return uint64(x<<1) ^ uint64(x>>63)
526}
527
528// DecodeBool decodes a uint64 as a bool.
529// Input: { 0, 1, 2, …}
530// Output: {false, true, true, …}
531func DecodeBool(x uint64) bool {
532 return x != 0
533}
534
535// EncodeBool encodes a bool as a uint64.
536// Input: {false, true}
537// Output: { 0, 1}
538func EncodeBool(x bool) uint64 {
539 if x {
540 return 1
541 }
542 return 0
543}