Joe Tsai | 22505a4 | 2018-08-01 13:12:49 -0700 | [diff] [blame^] | 1 | // Copyright 2018 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Package wire parses and formats the protobuf wire encoding. |
| 6 | // |
| 7 | // See https://developers.google.com/protocol-buffers/docs/encoding. |
| 8 | package wire |
| 9 | |
| 10 | import ( |
| 11 | "errors" // TODO: replace with our own errors package? |
| 12 | "io" |
| 13 | "math/bits" |
| 14 | ) |
| 15 | |
| 16 | // Number represents the field number. |
| 17 | type Number int32 |
| 18 | |
| 19 | const ( |
| 20 | MinValidNumber Number = 1 |
| 21 | FirstReservedNumber Number = 19000 |
| 22 | LastReservedNumber Number = 19999 |
| 23 | MaxValidNumber Number = 1<<29 - 1 |
| 24 | ) |
| 25 | |
| 26 | // IsValid reports whether the field number is semantically valid. |
| 27 | // |
| 28 | // Note that while numbers within the reserved range are semantically invalid, |
| 29 | // they are syntactically valid in the wire format. |
| 30 | // Implementations may treat records with reserved field numbers as unknown. |
| 31 | func (n Number) IsValid() bool { |
| 32 | return MinValidNumber <= n && n < FirstReservedNumber || LastReservedNumber < n && n <= MaxValidNumber |
| 33 | } |
| 34 | |
| 35 | // Type represents the wire type. |
| 36 | type Type int8 |
| 37 | |
| 38 | const ( |
| 39 | VarintType Type = 0 |
| 40 | Fixed32Type Type = 5 |
| 41 | Fixed64Type Type = 1 |
| 42 | BytesType Type = 2 |
| 43 | StartGroupType Type = 3 |
| 44 | EndGroupType Type = 4 |
| 45 | ) |
| 46 | |
| 47 | const ( |
| 48 | _ = -iota |
| 49 | errCodeTruncated |
| 50 | errCodeFieldNumber |
| 51 | errCodeOverflow |
| 52 | errCodeReserved |
| 53 | errCodeEndGroup |
| 54 | ) |
| 55 | |
| 56 | var ( |
| 57 | errFieldNumber = errors.New("invalid field number") |
| 58 | errOverflow = errors.New("variable length integer overflow") |
| 59 | errReserved = errors.New("cannot parse reserved wire type") |
| 60 | errEndGroup = errors.New("mismatching end group marker") |
| 61 | errParse = errors.New("parse error") |
| 62 | ) |
| 63 | |
| 64 | // ParseError converts an error code into an error value. |
| 65 | // This returns nil if n is a non-negative number. |
| 66 | func ParseError(n int) error { |
| 67 | if n >= 0 { |
| 68 | return nil |
| 69 | } |
| 70 | switch n { |
| 71 | case errCodeTruncated: |
| 72 | return io.ErrUnexpectedEOF |
| 73 | case errCodeFieldNumber: |
| 74 | return errFieldNumber |
| 75 | case errCodeOverflow: |
| 76 | return errOverflow |
| 77 | case errCodeReserved: |
| 78 | return errReserved |
| 79 | case errCodeEndGroup: |
| 80 | return errEndGroup |
| 81 | default: |
| 82 | return errParse |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | // ConsumeField parses an entire field record (both tag and value) and returns |
| 87 | // the field number, the wire type, and the total length. |
| 88 | // This returns a negative length upon an error (see ParseError). |
| 89 | // |
| 90 | // The total length includes the tag header and the end group marker (if the |
| 91 | // field is a group). |
| 92 | func ConsumeField(b []byte) (Number, Type, int) { |
| 93 | num, typ, n := ConsumeTag(b) |
| 94 | if n < 0 { |
| 95 | return 0, 0, n // forward error code |
| 96 | } |
| 97 | m := ConsumeFieldValue(num, typ, b[n:]) |
| 98 | if m < 0 { |
| 99 | return 0, 0, m // forward error code |
| 100 | } |
| 101 | return num, typ, n + m |
| 102 | } |
| 103 | |
| 104 | // ConsumeFieldValue parses a field value and returns its length. |
| 105 | // This assumes that the field Number and wire Type have already been parsed. |
| 106 | // This returns a negative length upon an error (see ParseError). |
| 107 | // |
| 108 | // When parsing a group, the length includes the end group marker and |
| 109 | // the end group is verified to match the starting field number. |
| 110 | func ConsumeFieldValue(num Number, typ Type, b []byte) (n int) { |
| 111 | switch typ { |
| 112 | case VarintType: |
| 113 | _, n = ConsumeVarint(b) |
| 114 | return n |
| 115 | case Fixed32Type: |
| 116 | _, n = ConsumeFixed32(b) |
| 117 | return n |
| 118 | case Fixed64Type: |
| 119 | _, n = ConsumeFixed64(b) |
| 120 | return n |
| 121 | case BytesType: |
| 122 | _, n = ConsumeBytes(b) |
| 123 | return n |
| 124 | case StartGroupType: |
| 125 | n0 := len(b) |
| 126 | for { |
| 127 | num2, typ2, n := ConsumeTag(b) |
| 128 | if n < 0 { |
| 129 | return n // forward error code |
| 130 | } |
| 131 | b = b[n:] |
| 132 | if typ2 == EndGroupType { |
| 133 | if num != num2 { |
| 134 | return errCodeEndGroup |
| 135 | } |
| 136 | return n0 - len(b) |
| 137 | } |
| 138 | |
| 139 | n = ConsumeFieldValue(num2, typ2, b) |
| 140 | if n < 0 { |
| 141 | return n // forward error code |
| 142 | } |
| 143 | b = b[n:] |
| 144 | } |
| 145 | case EndGroupType: |
| 146 | return errCodeEndGroup |
| 147 | default: |
| 148 | return errCodeReserved |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | // AppendTag encodes num and typ as a varint-encoded tag and appends it to b. |
| 153 | func AppendTag(b []byte, num Number, typ Type) []byte { |
| 154 | return AppendVarint(b, EncodeTag(num, typ)) |
| 155 | } |
| 156 | |
| 157 | // ConsumeTag parses b as a varint-encoded tag, reporting its length. |
| 158 | // This returns a negative length upon an error (see ParseError). |
| 159 | func ConsumeTag(b []byte) (Number, Type, int) { |
| 160 | v, n := ConsumeVarint(b) |
| 161 | if n < 0 { |
| 162 | return 0, 0, n // forward error code |
| 163 | } |
| 164 | num, typ := DecodeTag(v) |
| 165 | if num < MinValidNumber { |
| 166 | return 0, 0, errCodeFieldNumber |
| 167 | } |
| 168 | return num, typ, n |
| 169 | } |
| 170 | |
| 171 | func SizeTag(num Number) int { |
| 172 | return SizeVarint(EncodeTag(num, 0)) // wire type has no effect on size |
| 173 | } |
| 174 | |
| 175 | // AppendVarint appends v to b as a varint-encoded uint64. |
| 176 | func AppendVarint(b []byte, v uint64) []byte { |
| 177 | // TODO: Specialize for sizes 1 and 2 with mid-stack inlining. |
| 178 | switch { |
| 179 | case v < 1<<7: |
| 180 | b = append(b, byte(v)) |
| 181 | case v < 1<<14: |
| 182 | b = append(b, |
| 183 | byte((v>>0)&0x7f|0x80), |
| 184 | byte(v>>7)) |
| 185 | case v < 1<<21: |
| 186 | b = append(b, |
| 187 | byte((v>>0)&0x7f|0x80), |
| 188 | byte((v>>7)&0x7f|0x80), |
| 189 | byte(v>>14)) |
| 190 | case v < 1<<28: |
| 191 | b = append(b, |
| 192 | byte((v>>0)&0x7f|0x80), |
| 193 | byte((v>>7)&0x7f|0x80), |
| 194 | byte((v>>14)&0x7f|0x80), |
| 195 | byte(v>>21)) |
| 196 | case v < 1<<35: |
| 197 | b = append(b, |
| 198 | byte((v>>0)&0x7f|0x80), |
| 199 | byte((v>>7)&0x7f|0x80), |
| 200 | byte((v>>14)&0x7f|0x80), |
| 201 | byte((v>>21)&0x7f|0x80), |
| 202 | byte(v>>28)) |
| 203 | case v < 1<<42: |
| 204 | b = append(b, |
| 205 | byte((v>>0)&0x7f|0x80), |
| 206 | byte((v>>7)&0x7f|0x80), |
| 207 | byte((v>>14)&0x7f|0x80), |
| 208 | byte((v>>21)&0x7f|0x80), |
| 209 | byte((v>>28)&0x7f|0x80), |
| 210 | byte(v>>35)) |
| 211 | case v < 1<<49: |
| 212 | b = append(b, |
| 213 | byte((v>>0)&0x7f|0x80), |
| 214 | byte((v>>7)&0x7f|0x80), |
| 215 | byte((v>>14)&0x7f|0x80), |
| 216 | byte((v>>21)&0x7f|0x80), |
| 217 | byte((v>>28)&0x7f|0x80), |
| 218 | byte((v>>35)&0x7f|0x80), |
| 219 | byte(v>>42)) |
| 220 | case v < 1<<56: |
| 221 | b = append(b, |
| 222 | byte((v>>0)&0x7f|0x80), |
| 223 | byte((v>>7)&0x7f|0x80), |
| 224 | byte((v>>14)&0x7f|0x80), |
| 225 | byte((v>>21)&0x7f|0x80), |
| 226 | byte((v>>28)&0x7f|0x80), |
| 227 | byte((v>>35)&0x7f|0x80), |
| 228 | byte((v>>42)&0x7f|0x80), |
| 229 | byte(v>>49)) |
| 230 | case v < 1<<63: |
| 231 | b = append(b, |
| 232 | byte((v>>0)&0x7f|0x80), |
| 233 | byte((v>>7)&0x7f|0x80), |
| 234 | byte((v>>14)&0x7f|0x80), |
| 235 | byte((v>>21)&0x7f|0x80), |
| 236 | byte((v>>28)&0x7f|0x80), |
| 237 | byte((v>>35)&0x7f|0x80), |
| 238 | byte((v>>42)&0x7f|0x80), |
| 239 | byte((v>>49)&0x7f|0x80), |
| 240 | byte(v>>56)) |
| 241 | default: |
| 242 | b = append(b, |
| 243 | byte((v>>0)&0x7f|0x80), |
| 244 | byte((v>>7)&0x7f|0x80), |
| 245 | byte((v>>14)&0x7f|0x80), |
| 246 | byte((v>>21)&0x7f|0x80), |
| 247 | byte((v>>28)&0x7f|0x80), |
| 248 | byte((v>>35)&0x7f|0x80), |
| 249 | byte((v>>42)&0x7f|0x80), |
| 250 | byte((v>>49)&0x7f|0x80), |
| 251 | byte((v>>56)&0x7f|0x80), |
| 252 | 1) |
| 253 | } |
| 254 | return b |
| 255 | } |
| 256 | |
| 257 | // ConsumeVarint parses b as a varint-encoded uint64, reporting its length. |
| 258 | // This returns a negative length upon an error (see ParseError). |
| 259 | func ConsumeVarint(b []byte) (v uint64, n int) { |
| 260 | // TODO: Specialize for sizes 1 and 2 with mid-stack inlining. |
| 261 | var y uint64 |
| 262 | if len(b) <= 0 { |
| 263 | return 0, errCodeTruncated |
| 264 | } |
| 265 | v = uint64(b[0]) |
| 266 | if v < 0x80 { |
| 267 | return v, 1 |
| 268 | } |
| 269 | v -= 0x80 |
| 270 | |
| 271 | if len(b) <= 1 { |
| 272 | return 0, errCodeTruncated |
| 273 | } |
| 274 | y = uint64(b[1]) |
| 275 | v += y << 7 |
| 276 | if y < 0x80 { |
| 277 | return v, 2 |
| 278 | } |
| 279 | v -= 0x80 << 7 |
| 280 | |
| 281 | if len(b) <= 2 { |
| 282 | return 0, errCodeTruncated |
| 283 | } |
| 284 | y = uint64(b[2]) |
| 285 | v += y << 14 |
| 286 | if y < 0x80 { |
| 287 | return v, 3 |
| 288 | } |
| 289 | v -= 0x80 << 14 |
| 290 | |
| 291 | if len(b) <= 3 { |
| 292 | return 0, errCodeTruncated |
| 293 | } |
| 294 | y = uint64(b[3]) |
| 295 | v += y << 21 |
| 296 | if y < 0x80 { |
| 297 | return v, 4 |
| 298 | } |
| 299 | v -= 0x80 << 21 |
| 300 | |
| 301 | if len(b) <= 4 { |
| 302 | return 0, errCodeTruncated |
| 303 | } |
| 304 | y = uint64(b[4]) |
| 305 | v += y << 28 |
| 306 | if y < 0x80 { |
| 307 | return v, 5 |
| 308 | } |
| 309 | v -= 0x80 << 28 |
| 310 | |
| 311 | if len(b) <= 5 { |
| 312 | return 0, errCodeTruncated |
| 313 | } |
| 314 | y = uint64(b[5]) |
| 315 | v += y << 35 |
| 316 | if y < 0x80 { |
| 317 | return v, 6 |
| 318 | } |
| 319 | v -= 0x80 << 35 |
| 320 | |
| 321 | if len(b) <= 6 { |
| 322 | return 0, errCodeTruncated |
| 323 | } |
| 324 | y = uint64(b[6]) |
| 325 | v += y << 42 |
| 326 | if y < 0x80 { |
| 327 | return v, 7 |
| 328 | } |
| 329 | v -= 0x80 << 42 |
| 330 | |
| 331 | if len(b) <= 7 { |
| 332 | return 0, errCodeTruncated |
| 333 | } |
| 334 | y = uint64(b[7]) |
| 335 | v += y << 49 |
| 336 | if y < 0x80 { |
| 337 | return v, 8 |
| 338 | } |
| 339 | v -= 0x80 << 49 |
| 340 | |
| 341 | if len(b) <= 8 { |
| 342 | return 0, errCodeTruncated |
| 343 | } |
| 344 | y = uint64(b[8]) |
| 345 | v += y << 56 |
| 346 | if y < 0x80 { |
| 347 | return v, 9 |
| 348 | } |
| 349 | v -= 0x80 << 56 |
| 350 | |
| 351 | if len(b) <= 9 { |
| 352 | return 0, errCodeTruncated |
| 353 | } |
| 354 | y = uint64(b[9]) |
| 355 | v += y << 63 |
| 356 | if y < 2 { |
| 357 | return v, 10 |
| 358 | } |
| 359 | return 0, errCodeOverflow |
| 360 | } |
| 361 | |
| 362 | // SizeVarint returns the encoded size of a varint. |
| 363 | // The size is guaranteed to be within 1 and 10, inclusive. |
| 364 | func SizeVarint(v uint64) int { |
| 365 | return 1 + (bits.Len64(v)-1)/7 |
| 366 | } |
| 367 | |
| 368 | // AppendFixed32 appends v to b as a little-endian uint32. |
| 369 | func AppendFixed32(b []byte, v uint32) []byte { |
| 370 | return append(b, |
| 371 | byte(v>>0), |
| 372 | byte(v>>8), |
| 373 | byte(v>>16), |
| 374 | byte(v>>24)) |
| 375 | } |
| 376 | |
| 377 | // ConsumeFixed32 parses b as a little-endian uint32, reporting its length. |
| 378 | // This returns a negative length upon an error (see ParseError). |
| 379 | func ConsumeFixed32(b []byte) (v uint32, n int) { |
| 380 | if len(b) < 4 { |
| 381 | return 0, errCodeTruncated |
| 382 | } |
| 383 | v = uint32(b[0])<<0 | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 |
| 384 | return v, 4 |
| 385 | } |
| 386 | |
| 387 | // SizeFixed32 returns the encoded size of a fixed32; which is always 4. |
| 388 | func SizeFixed32() int { |
| 389 | return 4 |
| 390 | } |
| 391 | |
| 392 | // AppendFixed64 appends v to b as a little-endian uint64. |
| 393 | func AppendFixed64(b []byte, v uint64) []byte { |
| 394 | return append(b, |
| 395 | byte(v>>0), |
| 396 | byte(v>>8), |
| 397 | byte(v>>16), |
| 398 | byte(v>>24), |
| 399 | byte(v>>32), |
| 400 | byte(v>>40), |
| 401 | byte(v>>48), |
| 402 | byte(v>>56)) |
| 403 | } |
| 404 | |
| 405 | // ConsumeFixed64 parses b as a little-endian uint64, reporting its length. |
| 406 | // This returns a negative length upon an error (see ParseError). |
| 407 | func ConsumeFixed64(b []byte) (v uint64, n int) { |
| 408 | if len(b) < 8 { |
| 409 | return 0, errCodeTruncated |
| 410 | } |
| 411 | v = uint64(b[0])<<0 | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 |
| 412 | return v, 8 |
| 413 | } |
| 414 | |
| 415 | // SizeFixed64 returns the encoded size of a fixed64; which is always 8. |
| 416 | func SizeFixed64() int { |
| 417 | return 8 |
| 418 | } |
| 419 | |
| 420 | // AppendBytes appends v to b as a length-prefixed bytes value. |
| 421 | func AppendBytes(b []byte, v []byte) []byte { |
| 422 | return append(AppendVarint(b, uint64(len(v))), v...) |
| 423 | } |
| 424 | |
| 425 | // ConsumeBytes parses b as a length-prefixed bytes value, reporting its length. |
| 426 | // This returns a negative length upon an error (see ParseError). |
| 427 | func ConsumeBytes(b []byte) (v []byte, n int) { |
| 428 | m, n := ConsumeVarint(b) |
| 429 | if n < 0 { |
| 430 | return nil, n // forward error code |
| 431 | } |
| 432 | if m > uint64(len(b[n:])) { |
| 433 | return nil, errCodeTruncated |
| 434 | } |
| 435 | return b[n:][:m], n + int(m) |
| 436 | } |
| 437 | |
| 438 | // SizeBytes returns the encoded size of a length-prefixed bytes value, |
| 439 | // given only the length. |
| 440 | func SizeBytes(n int) int { |
| 441 | return SizeVarint(uint64(n)) + n |
| 442 | } |
| 443 | |
| 444 | // AppendGroup appends v to b as group value, with a trailing end group marker. |
| 445 | // The value v must not contain the end marker. |
| 446 | func AppendGroup(b []byte, num Number, v []byte) []byte { |
| 447 | return AppendVarint(append(b, v...), EncodeTag(num, EndGroupType)) |
| 448 | } |
| 449 | |
| 450 | // ConsumeGroup parses b as a group value until the trailing end group marker, |
| 451 | // and verifies that the end marker matches the provided num. The value v |
| 452 | // does not contain the end marker, while the length does contain the end marker. |
| 453 | // This returns a negative length upon an error (see ParseError). |
| 454 | func ConsumeGroup(num Number, b []byte) (v []byte, n int) { |
| 455 | n = ConsumeFieldValue(num, StartGroupType, b) |
| 456 | if n < 0 { |
| 457 | return nil, n // forward error code |
| 458 | } |
| 459 | b = b[:n] |
| 460 | |
| 461 | // Truncate off end group marker, but need to handle denormalized varints. |
| 462 | // Assuming end marker is never 0 (which is always the case since |
| 463 | // EndGroupType is non-zero), we can truncate all trailing bytes where the |
| 464 | // lower 7 bits are all zero (implying that the varint is denormalized). |
| 465 | for len(b) > 0 && b[len(b)-1]&0x7f == 0 { |
| 466 | b = b[:len(b)-1] |
| 467 | } |
| 468 | b = b[:len(b)-SizeTag(num)] |
| 469 | return b, n |
| 470 | } |
| 471 | |
| 472 | // SizeGroup returns the encoded size of a group, given only the length. |
| 473 | func SizeGroup(num Number, n int) int { |
| 474 | return n + SizeTag(num) |
| 475 | } |
| 476 | |
| 477 | // DecodeTag decodes the field Number and wire Type from its unified form. |
| 478 | // The Number is -1 if the decoded field number overflows. |
| 479 | // Other than overflow, this does not check for field number validity. |
| 480 | func DecodeTag(x uint64) (Number, Type) { |
| 481 | num := Number(x >> 3) |
| 482 | if num > MaxValidNumber { |
| 483 | num = -1 |
| 484 | } |
| 485 | return num, Type(x & 7) |
| 486 | } |
| 487 | |
| 488 | // EncodeTag encodes the field Number and wire Type into its unified form. |
| 489 | func EncodeTag(num Number, typ Type) uint64 { |
| 490 | return uint64(num)<<3 | uint64(typ&7) |
| 491 | } |
| 492 | |
| 493 | // DecodeZigZag decodes a zig-zag-encoded uint64 as an int64. |
| 494 | // Input: {…, 5, 3, 1, 0, 2, 4, 6, …} |
| 495 | // Output: {…, -3, -2, -1, 0, +1, +2, +3, …} |
| 496 | func DecodeZigZag(x uint64) int64 { |
| 497 | return int64(x>>1) ^ int64(x)<<63>>63 |
| 498 | } |
| 499 | |
| 500 | // EncodeZigZag encodes an int64 as a zig-zag-encoded uint64. |
| 501 | // Input: {…, -3, -2, -1, 0, +1, +2, +3, …} |
| 502 | // Output: {…, 5, 3, 1, 0, 2, 4, 6, …} |
| 503 | func EncodeZigZag(x int64) uint64 { |
| 504 | return uint64(x<<1) ^ uint64(x>>63) |
| 505 | } |
| 506 | |
| 507 | // DecodeBool decodes a uint64 as a bool. |
| 508 | // Input: { 0, 1, 2, …} |
| 509 | // Output: {false, true, true, …} |
| 510 | func DecodeBool(x uint64) bool { |
| 511 | return x != 0 |
| 512 | } |
| 513 | |
| 514 | // EncodeBool encodes a bool as a uint64. |
| 515 | // Input: {false, true} |
| 516 | // Output: { 0, 1} |
| 517 | func EncodeBool(x bool) uint64 { |
| 518 | if x { |
| 519 | return 1 |
| 520 | } |
| 521 | return 0 |
| 522 | } |