blob: ef4624c8439ccfde2eff72fcfd8dad7beda4f052 [file] [log] [blame]
Joe Tsai22505a42018-08-01 13:12:49 -07001// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package wire parses and formats the protobuf wire encoding.
6//
7// See https://developers.google.com/protocol-buffers/docs/encoding.
8package wire
9
10import (
11 "errors" // TODO: replace with our own errors package?
12 "io"
13 "math/bits"
14)
15
16// Number represents the field number.
17type Number int32
18
19const (
20 MinValidNumber Number = 1
21 FirstReservedNumber Number = 19000
22 LastReservedNumber Number = 19999
23 MaxValidNumber Number = 1<<29 - 1
24)
25
26// IsValid reports whether the field number is semantically valid.
27//
28// Note that while numbers within the reserved range are semantically invalid,
29// they are syntactically valid in the wire format.
30// Implementations may treat records with reserved field numbers as unknown.
31func (n Number) IsValid() bool {
32 return MinValidNumber <= n && n < FirstReservedNumber || LastReservedNumber < n && n <= MaxValidNumber
33}
34
35// Type represents the wire type.
36type Type int8
37
38const (
39 VarintType Type = 0
40 Fixed32Type Type = 5
41 Fixed64Type Type = 1
42 BytesType Type = 2
43 StartGroupType Type = 3
44 EndGroupType Type = 4
45)
46
47const (
48 _ = -iota
49 errCodeTruncated
50 errCodeFieldNumber
51 errCodeOverflow
52 errCodeReserved
53 errCodeEndGroup
54)
55
56var (
57 errFieldNumber = errors.New("invalid field number")
58 errOverflow = errors.New("variable length integer overflow")
59 errReserved = errors.New("cannot parse reserved wire type")
60 errEndGroup = errors.New("mismatching end group marker")
61 errParse = errors.New("parse error")
62)
63
64// ParseError converts an error code into an error value.
65// This returns nil if n is a non-negative number.
66func ParseError(n int) error {
67 if n >= 0 {
68 return nil
69 }
70 switch n {
71 case errCodeTruncated:
72 return io.ErrUnexpectedEOF
73 case errCodeFieldNumber:
74 return errFieldNumber
75 case errCodeOverflow:
76 return errOverflow
77 case errCodeReserved:
78 return errReserved
79 case errCodeEndGroup:
80 return errEndGroup
81 default:
82 return errParse
83 }
84}
85
86// ConsumeField parses an entire field record (both tag and value) and returns
87// the field number, the wire type, and the total length.
88// This returns a negative length upon an error (see ParseError).
89//
90// The total length includes the tag header and the end group marker (if the
91// field is a group).
92func ConsumeField(b []byte) (Number, Type, int) {
93 num, typ, n := ConsumeTag(b)
94 if n < 0 {
95 return 0, 0, n // forward error code
96 }
97 m := ConsumeFieldValue(num, typ, b[n:])
98 if m < 0 {
99 return 0, 0, m // forward error code
100 }
101 return num, typ, n + m
102}
103
104// ConsumeFieldValue parses a field value and returns its length.
105// This assumes that the field Number and wire Type have already been parsed.
106// This returns a negative length upon an error (see ParseError).
107//
108// When parsing a group, the length includes the end group marker and
109// the end group is verified to match the starting field number.
110func ConsumeFieldValue(num Number, typ Type, b []byte) (n int) {
111 switch typ {
112 case VarintType:
113 _, n = ConsumeVarint(b)
114 return n
115 case Fixed32Type:
116 _, n = ConsumeFixed32(b)
117 return n
118 case Fixed64Type:
119 _, n = ConsumeFixed64(b)
120 return n
121 case BytesType:
122 _, n = ConsumeBytes(b)
123 return n
124 case StartGroupType:
125 n0 := len(b)
126 for {
127 num2, typ2, n := ConsumeTag(b)
128 if n < 0 {
129 return n // forward error code
130 }
131 b = b[n:]
132 if typ2 == EndGroupType {
133 if num != num2 {
134 return errCodeEndGroup
135 }
136 return n0 - len(b)
137 }
138
139 n = ConsumeFieldValue(num2, typ2, b)
140 if n < 0 {
141 return n // forward error code
142 }
143 b = b[n:]
144 }
145 case EndGroupType:
146 return errCodeEndGroup
147 default:
148 return errCodeReserved
149 }
150}
151
152// AppendTag encodes num and typ as a varint-encoded tag and appends it to b.
153func AppendTag(b []byte, num Number, typ Type) []byte {
154 return AppendVarint(b, EncodeTag(num, typ))
155}
156
157// ConsumeTag parses b as a varint-encoded tag, reporting its length.
158// This returns a negative length upon an error (see ParseError).
159func ConsumeTag(b []byte) (Number, Type, int) {
160 v, n := ConsumeVarint(b)
161 if n < 0 {
162 return 0, 0, n // forward error code
163 }
164 num, typ := DecodeTag(v)
165 if num < MinValidNumber {
166 return 0, 0, errCodeFieldNumber
167 }
168 return num, typ, n
169}
170
171func SizeTag(num Number) int {
172 return SizeVarint(EncodeTag(num, 0)) // wire type has no effect on size
173}
174
175// AppendVarint appends v to b as a varint-encoded uint64.
176func AppendVarint(b []byte, v uint64) []byte {
177 // TODO: Specialize for sizes 1 and 2 with mid-stack inlining.
178 switch {
179 case v < 1<<7:
180 b = append(b, byte(v))
181 case v < 1<<14:
182 b = append(b,
183 byte((v>>0)&0x7f|0x80),
184 byte(v>>7))
185 case v < 1<<21:
186 b = append(b,
187 byte((v>>0)&0x7f|0x80),
188 byte((v>>7)&0x7f|0x80),
189 byte(v>>14))
190 case v < 1<<28:
191 b = append(b,
192 byte((v>>0)&0x7f|0x80),
193 byte((v>>7)&0x7f|0x80),
194 byte((v>>14)&0x7f|0x80),
195 byte(v>>21))
196 case v < 1<<35:
197 b = append(b,
198 byte((v>>0)&0x7f|0x80),
199 byte((v>>7)&0x7f|0x80),
200 byte((v>>14)&0x7f|0x80),
201 byte((v>>21)&0x7f|0x80),
202 byte(v>>28))
203 case v < 1<<42:
204 b = append(b,
205 byte((v>>0)&0x7f|0x80),
206 byte((v>>7)&0x7f|0x80),
207 byte((v>>14)&0x7f|0x80),
208 byte((v>>21)&0x7f|0x80),
209 byte((v>>28)&0x7f|0x80),
210 byte(v>>35))
211 case v < 1<<49:
212 b = append(b,
213 byte((v>>0)&0x7f|0x80),
214 byte((v>>7)&0x7f|0x80),
215 byte((v>>14)&0x7f|0x80),
216 byte((v>>21)&0x7f|0x80),
217 byte((v>>28)&0x7f|0x80),
218 byte((v>>35)&0x7f|0x80),
219 byte(v>>42))
220 case v < 1<<56:
221 b = append(b,
222 byte((v>>0)&0x7f|0x80),
223 byte((v>>7)&0x7f|0x80),
224 byte((v>>14)&0x7f|0x80),
225 byte((v>>21)&0x7f|0x80),
226 byte((v>>28)&0x7f|0x80),
227 byte((v>>35)&0x7f|0x80),
228 byte((v>>42)&0x7f|0x80),
229 byte(v>>49))
230 case v < 1<<63:
231 b = append(b,
232 byte((v>>0)&0x7f|0x80),
233 byte((v>>7)&0x7f|0x80),
234 byte((v>>14)&0x7f|0x80),
235 byte((v>>21)&0x7f|0x80),
236 byte((v>>28)&0x7f|0x80),
237 byte((v>>35)&0x7f|0x80),
238 byte((v>>42)&0x7f|0x80),
239 byte((v>>49)&0x7f|0x80),
240 byte(v>>56))
241 default:
242 b = append(b,
243 byte((v>>0)&0x7f|0x80),
244 byte((v>>7)&0x7f|0x80),
245 byte((v>>14)&0x7f|0x80),
246 byte((v>>21)&0x7f|0x80),
247 byte((v>>28)&0x7f|0x80),
248 byte((v>>35)&0x7f|0x80),
249 byte((v>>42)&0x7f|0x80),
250 byte((v>>49)&0x7f|0x80),
251 byte((v>>56)&0x7f|0x80),
252 1)
253 }
254 return b
255}
256
257// ConsumeVarint parses b as a varint-encoded uint64, reporting its length.
258// This returns a negative length upon an error (see ParseError).
259func ConsumeVarint(b []byte) (v uint64, n int) {
260 // TODO: Specialize for sizes 1 and 2 with mid-stack inlining.
261 var y uint64
262 if len(b) <= 0 {
263 return 0, errCodeTruncated
264 }
265 v = uint64(b[0])
266 if v < 0x80 {
267 return v, 1
268 }
269 v -= 0x80
270
271 if len(b) <= 1 {
272 return 0, errCodeTruncated
273 }
274 y = uint64(b[1])
275 v += y << 7
276 if y < 0x80 {
277 return v, 2
278 }
279 v -= 0x80 << 7
280
281 if len(b) <= 2 {
282 return 0, errCodeTruncated
283 }
284 y = uint64(b[2])
285 v += y << 14
286 if y < 0x80 {
287 return v, 3
288 }
289 v -= 0x80 << 14
290
291 if len(b) <= 3 {
292 return 0, errCodeTruncated
293 }
294 y = uint64(b[3])
295 v += y << 21
296 if y < 0x80 {
297 return v, 4
298 }
299 v -= 0x80 << 21
300
301 if len(b) <= 4 {
302 return 0, errCodeTruncated
303 }
304 y = uint64(b[4])
305 v += y << 28
306 if y < 0x80 {
307 return v, 5
308 }
309 v -= 0x80 << 28
310
311 if len(b) <= 5 {
312 return 0, errCodeTruncated
313 }
314 y = uint64(b[5])
315 v += y << 35
316 if y < 0x80 {
317 return v, 6
318 }
319 v -= 0x80 << 35
320
321 if len(b) <= 6 {
322 return 0, errCodeTruncated
323 }
324 y = uint64(b[6])
325 v += y << 42
326 if y < 0x80 {
327 return v, 7
328 }
329 v -= 0x80 << 42
330
331 if len(b) <= 7 {
332 return 0, errCodeTruncated
333 }
334 y = uint64(b[7])
335 v += y << 49
336 if y < 0x80 {
337 return v, 8
338 }
339 v -= 0x80 << 49
340
341 if len(b) <= 8 {
342 return 0, errCodeTruncated
343 }
344 y = uint64(b[8])
345 v += y << 56
346 if y < 0x80 {
347 return v, 9
348 }
349 v -= 0x80 << 56
350
351 if len(b) <= 9 {
352 return 0, errCodeTruncated
353 }
354 y = uint64(b[9])
355 v += y << 63
356 if y < 2 {
357 return v, 10
358 }
359 return 0, errCodeOverflow
360}
361
362// SizeVarint returns the encoded size of a varint.
363// The size is guaranteed to be within 1 and 10, inclusive.
364func SizeVarint(v uint64) int {
365 return 1 + (bits.Len64(v)-1)/7
366}
367
368// AppendFixed32 appends v to b as a little-endian uint32.
369func AppendFixed32(b []byte, v uint32) []byte {
370 return append(b,
371 byte(v>>0),
372 byte(v>>8),
373 byte(v>>16),
374 byte(v>>24))
375}
376
377// ConsumeFixed32 parses b as a little-endian uint32, reporting its length.
378// This returns a negative length upon an error (see ParseError).
379func ConsumeFixed32(b []byte) (v uint32, n int) {
380 if len(b) < 4 {
381 return 0, errCodeTruncated
382 }
383 v = uint32(b[0])<<0 | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
384 return v, 4
385}
386
387// SizeFixed32 returns the encoded size of a fixed32; which is always 4.
388func SizeFixed32() int {
389 return 4
390}
391
392// AppendFixed64 appends v to b as a little-endian uint64.
393func AppendFixed64(b []byte, v uint64) []byte {
394 return append(b,
395 byte(v>>0),
396 byte(v>>8),
397 byte(v>>16),
398 byte(v>>24),
399 byte(v>>32),
400 byte(v>>40),
401 byte(v>>48),
402 byte(v>>56))
403}
404
405// ConsumeFixed64 parses b as a little-endian uint64, reporting its length.
406// This returns a negative length upon an error (see ParseError).
407func ConsumeFixed64(b []byte) (v uint64, n int) {
408 if len(b) < 8 {
409 return 0, errCodeTruncated
410 }
411 v = uint64(b[0])<<0 | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
412 return v, 8
413}
414
415// SizeFixed64 returns the encoded size of a fixed64; which is always 8.
416func SizeFixed64() int {
417 return 8
418}
419
420// AppendBytes appends v to b as a length-prefixed bytes value.
421func AppendBytes(b []byte, v []byte) []byte {
422 return append(AppendVarint(b, uint64(len(v))), v...)
423}
424
425// ConsumeBytes parses b as a length-prefixed bytes value, reporting its length.
426// This returns a negative length upon an error (see ParseError).
427func ConsumeBytes(b []byte) (v []byte, n int) {
428 m, n := ConsumeVarint(b)
429 if n < 0 {
430 return nil, n // forward error code
431 }
432 if m > uint64(len(b[n:])) {
433 return nil, errCodeTruncated
434 }
435 return b[n:][:m], n + int(m)
436}
437
438// SizeBytes returns the encoded size of a length-prefixed bytes value,
439// given only the length.
440func SizeBytes(n int) int {
441 return SizeVarint(uint64(n)) + n
442}
443
444// AppendGroup appends v to b as group value, with a trailing end group marker.
445// The value v must not contain the end marker.
446func AppendGroup(b []byte, num Number, v []byte) []byte {
447 return AppendVarint(append(b, v...), EncodeTag(num, EndGroupType))
448}
449
450// ConsumeGroup parses b as a group value until the trailing end group marker,
451// and verifies that the end marker matches the provided num. The value v
452// does not contain the end marker, while the length does contain the end marker.
453// This returns a negative length upon an error (see ParseError).
454func ConsumeGroup(num Number, b []byte) (v []byte, n int) {
455 n = ConsumeFieldValue(num, StartGroupType, b)
456 if n < 0 {
457 return nil, n // forward error code
458 }
459 b = b[:n]
460
461 // Truncate off end group marker, but need to handle denormalized varints.
462 // Assuming end marker is never 0 (which is always the case since
463 // EndGroupType is non-zero), we can truncate all trailing bytes where the
464 // lower 7 bits are all zero (implying that the varint is denormalized).
465 for len(b) > 0 && b[len(b)-1]&0x7f == 0 {
466 b = b[:len(b)-1]
467 }
468 b = b[:len(b)-SizeTag(num)]
469 return b, n
470}
471
472// SizeGroup returns the encoded size of a group, given only the length.
473func SizeGroup(num Number, n int) int {
474 return n + SizeTag(num)
475}
476
477// DecodeTag decodes the field Number and wire Type from its unified form.
478// The Number is -1 if the decoded field number overflows.
479// Other than overflow, this does not check for field number validity.
480func DecodeTag(x uint64) (Number, Type) {
481 num := Number(x >> 3)
482 if num > MaxValidNumber {
483 num = -1
484 }
485 return num, Type(x & 7)
486}
487
488// EncodeTag encodes the field Number and wire Type into its unified form.
489func EncodeTag(num Number, typ Type) uint64 {
490 return uint64(num)<<3 | uint64(typ&7)
491}
492
493// DecodeZigZag decodes a zig-zag-encoded uint64 as an int64.
494// Input: {…, 5, 3, 1, 0, 2, 4, 6, …}
495// Output: {…, -3, -2, -1, 0, +1, +2, +3, …}
496func DecodeZigZag(x uint64) int64 {
497 return int64(x>>1) ^ int64(x)<<63>>63
498}
499
500// EncodeZigZag encodes an int64 as a zig-zag-encoded uint64.
501// Input: {…, -3, -2, -1, 0, +1, +2, +3, …}
502// Output: {…, 5, 3, 1, 0, 2, 4, 6, …}
503func EncodeZigZag(x int64) uint64 {
504 return uint64(x<<1) ^ uint64(x>>63)
505}
506
507// DecodeBool decodes a uint64 as a bool.
508// Input: { 0, 1, 2, …}
509// Output: {false, true, true, …}
510func DecodeBool(x uint64) bool {
511 return x != 0
512}
513
514// EncodeBool encodes a bool as a uint64.
515// Input: {false, true}
516// Output: { 0, 1}
517func EncodeBool(x bool) uint64 {
518 if x {
519 return 1
520 }
521 return 0
522}