blob: a6bab2fac2a632affda114a391312b8a62c22ac2 [file] [log] [blame]
Rob Pikeaaa3a622010-03-20 22:32:34 -07001// Go support for Protocol Buffers - Google's data interchange format
2//
David Symondsee6e9c52012-11-29 08:51:07 +11003// Copyright 2010 The Go Authors. All rights reserved.
David Symonds558f13f2014-11-24 10:28:53 +11004// https://github.com/golang/protobuf
Rob Pikeaaa3a622010-03-20 22:32:34 -07005//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32package proto
33
34// Functions for parsing the Text protocol buffer format.
David Symonds54531052011-12-08 12:00:31 +110035// TODO: message sets.
Rob Pikeaaa3a622010-03-20 22:32:34 -070036
37import (
David Symonds81177532014-11-20 14:33:40 +110038 "encoding"
David Symondsfa94a1e2012-09-24 13:21:49 +100039 "errors"
Rob Pikeaaa3a622010-03-20 22:32:34 -070040 "fmt"
Rob Pikeaaa3a622010-03-20 22:32:34 -070041 "reflect"
42 "strconv"
David Symonds183124e2012-03-23 13:20:23 +110043 "strings"
David Symondsfa94a1e2012-09-24 13:21:49 +100044 "unicode/utf8"
Rob Pikeaaa3a622010-03-20 22:32:34 -070045)
46
Luke Granger-Brown1687f002016-07-27 05:08:00 -070047// Error string emitted when deserializing Any and fields are already set
48const anyRepeatedlyUnpacked = "Any message unpacked multiple times, or %q already set"
49
Rob Pikeaaa3a622010-03-20 22:32:34 -070050type ParseError struct {
51 Message string
52 Line int // 1-based line number
53 Offset int // 0-based byte offset from start of input
54}
55
Rob Pikea17fdd92011-11-02 12:43:05 -070056func (p *ParseError) Error() string {
Rob Pikeaaa3a622010-03-20 22:32:34 -070057 if p.Line == 1 {
58 // show offset only for first line
59 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
60 }
61 return fmt.Sprintf("line %d: %v", p.Line, p.Message)
62}
63
64type token struct {
65 value string
66 err *ParseError
67 line int // line number
68 offset int // byte number from start of input, not start of line
69 unquoted string // the unquoted version of value, if it was a quoted string
70}
71
72func (t *token) String() string {
73 if t.err == nil {
74 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
75 }
76 return fmt.Sprintf("parse error: %v", t.err)
77}
78
79type textParser struct {
80 s string // remaining input
81 done bool // whether the parsing is finished (success or error)
82 backed bool // whether back() was called
83 offset, line int
84 cur token
85}
86
87func newTextParser(s string) *textParser {
88 p := new(textParser)
89 p.s = s
90 p.line = 1
91 p.cur.line = 1
92 return p
93}
94
Rob Piked6420b82011-04-13 16:37:04 -070095func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
Rob Pikead7cac72010-09-29 12:29:26 -070096 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
Rob Pikeaaa3a622010-03-20 22:32:34 -070097 p.cur.err = pe
98 p.done = true
99 return pe
100}
101
102// Numbers and identifiers are matched by [-+._A-Za-z0-9]
103func isIdentOrNumberChar(c byte) bool {
104 switch {
105 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
106 return true
107 case '0' <= c && c <= '9':
108 return true
109 }
110 switch c {
111 case '-', '+', '.', '_':
112 return true
113 }
114 return false
115}
116
117func isWhitespace(c byte) bool {
118 switch c {
119 case ' ', '\t', '\n', '\r':
120 return true
121 }
122 return false
123}
124
Daniel Kraftb9827042016-02-02 18:33:00 +1100125func isQuote(c byte) bool {
126 switch c {
127 case '"', '\'':
128 return true
129 }
130 return false
131}
132
Rob Pikeaaa3a622010-03-20 22:32:34 -0700133func (p *textParser) skipWhitespace() {
134 i := 0
135 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
136 if p.s[i] == '#' {
137 // comment; skip to end of line or input
138 for i < len(p.s) && p.s[i] != '\n' {
139 i++
140 }
141 if i == len(p.s) {
142 break
143 }
144 }
145 if p.s[i] == '\n' {
146 p.line++
147 }
148 i++
149 }
150 p.offset += i
151 p.s = p.s[i:len(p.s)]
152 if len(p.s) == 0 {
153 p.done = true
154 }
155}
156
157func (p *textParser) advance() {
158 // Skip whitespace
159 p.skipWhitespace()
160 if p.done {
161 return
162 }
163
164 // Start of non-whitespace
165 p.cur.err = nil
166 p.cur.offset, p.cur.line = p.offset, p.line
167 p.cur.unquoted = ""
168 switch p.s[0] {
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100169 case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
Rob Pikeaaa3a622010-03-20 22:32:34 -0700170 // Single symbol
171 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
David Symonds162d0032012-06-28 09:44:46 -0700172 case '"', '\'':
Rob Pikeaaa3a622010-03-20 22:32:34 -0700173 // Quoted string
174 i := 1
David Symonds162d0032012-06-28 09:44:46 -0700175 for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700176 if p.s[i] == '\\' && i+1 < len(p.s) {
177 // skip escaped char
178 i++
179 }
180 i++
181 }
David Symonds162d0032012-06-28 09:44:46 -0700182 if i >= len(p.s) || p.s[i] != p.s[0] {
Rob Piked6420b82011-04-13 16:37:04 -0700183 p.errorf("unmatched quote")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700184 return
185 }
David Symondsfa94a1e2012-09-24 13:21:49 +1000186 unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700187 if err != nil {
David Symondsbafa7bc2015-07-01 07:59:00 +1000188 p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700189 return
190 }
191 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
192 p.cur.unquoted = unq
193 default:
194 i := 0
195 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
196 i++
197 }
198 if i == 0 {
Rob Piked6420b82011-04-13 16:37:04 -0700199 p.errorf("unexpected byte %#x", p.s[0])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700200 return
201 }
202 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
203 }
204 p.offset += len(p.cur.value)
205}
206
David Symondsfa94a1e2012-09-24 13:21:49 +1000207var (
David Symondsa7f3a0f2013-09-09 13:32:33 +1000208 errBadUTF8 = errors.New("proto: bad UTF-8")
209 errBadHex = errors.New("proto: bad hexadecimal")
David Symondsfa94a1e2012-09-24 13:21:49 +1000210)
211
212func unquoteC(s string, quote rune) (string, error) {
213 // This is based on C++'s tokenizer.cc.
214 // Despite its name, this is *not* parsing C syntax.
215 // For instance, "\0" is an invalid quoted string.
216
217 // Avoid allocation in trivial cases.
218 simple := true
219 for _, r := range s {
220 if r == '\\' || r == quote {
221 simple = false
222 break
223 }
David Symonds162d0032012-06-28 09:44:46 -0700224 }
David Symondsfa94a1e2012-09-24 13:21:49 +1000225 if simple {
226 return s, nil
227 }
228
229 buf := make([]byte, 0, 3*len(s)/2)
230 for len(s) > 0 {
231 r, n := utf8.DecodeRuneInString(s)
232 if r == utf8.RuneError && n == 1 {
233 return "", errBadUTF8
234 }
235 s = s[n:]
236 if r != '\\' {
237 if r < utf8.RuneSelf {
238 buf = append(buf, byte(r))
239 } else {
240 buf = append(buf, string(r)...)
241 }
242 continue
243 }
244
245 ch, tail, err := unescape(s)
246 if err != nil {
247 return "", err
248 }
249 buf = append(buf, ch...)
250 s = tail
251 }
252 return string(buf), nil
David Symonds162d0032012-06-28 09:44:46 -0700253}
254
David Symondsfa94a1e2012-09-24 13:21:49 +1000255func unescape(s string) (ch string, tail string, err error) {
256 r, n := utf8.DecodeRuneInString(s)
257 if r == utf8.RuneError && n == 1 {
258 return "", "", errBadUTF8
David Symonds162d0032012-06-28 09:44:46 -0700259 }
David Symondsfa94a1e2012-09-24 13:21:49 +1000260 s = s[n:]
261 switch r {
262 case 'a':
263 return "\a", s, nil
264 case 'b':
265 return "\b", s, nil
266 case 'f':
267 return "\f", s, nil
268 case 'n':
269 return "\n", s, nil
270 case 'r':
271 return "\r", s, nil
272 case 't':
273 return "\t", s, nil
274 case 'v':
275 return "\v", s, nil
276 case '?':
277 return "?", s, nil // trigraph workaround
278 case '\'', '"', '\\':
279 return string(r), s, nil
280 case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
281 if len(s) < 2 {
282 return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
283 }
284 base := 8
285 ss := s[:2]
286 s = s[2:]
287 if r == 'x' || r == 'X' {
288 base = 16
289 } else {
290 ss = string(r) + ss
291 }
292 i, err := strconv.ParseUint(ss, base, 8)
293 if err != nil {
294 return "", "", err
295 }
296 return string([]byte{byte(i)}), s, nil
297 case 'u', 'U':
298 n := 4
299 if r == 'U' {
300 n = 8
301 }
302 if len(s) < n {
303 return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
304 }
David Symonds162d0032012-06-28 09:44:46 -0700305
David Symondsfa94a1e2012-09-24 13:21:49 +1000306 bs := make([]byte, n/2)
307 for i := 0; i < n; i += 2 {
308 a, ok1 := unhex(s[i])
309 b, ok2 := unhex(s[i+1])
310 if !ok1 || !ok2 {
311 return "", "", errBadHex
312 }
313 bs[i/2] = a<<4 | b
314 }
315 s = s[n:]
316 return string(bs), s, nil
317 }
318 return "", "", fmt.Errorf(`unknown escape \%c`, r)
319}
320
321// Adapted from src/pkg/strconv/quote.go.
322func unhex(b byte) (v byte, ok bool) {
323 switch {
324 case '0' <= b && b <= '9':
325 return b - '0', true
326 case 'a' <= b && b <= 'f':
327 return b - 'a' + 10, true
328 case 'A' <= b && b <= 'F':
329 return b - 'A' + 10, true
330 }
331 return 0, false
David Symonds183124e2012-03-23 13:20:23 +1100332}
333
Rob Pikeaaa3a622010-03-20 22:32:34 -0700334// Back off the parser by one token. Can only be done between calls to next().
335// It makes the next advance() a no-op.
336func (p *textParser) back() { p.backed = true }
337
338// Advances the parser and returns the new current token.
339func (p *textParser) next() *token {
340 if p.backed || p.done {
341 p.backed = false
342 return &p.cur
343 }
344 p.advance()
345 if p.done {
346 p.cur.value = ""
Daniel Kraftb9827042016-02-02 18:33:00 +1100347 } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700348 // Look for multiple quoted strings separated by whitespace,
349 // and concatenate them.
350 cat := p.cur
351 for {
352 p.skipWhitespace()
Daniel Kraftb9827042016-02-02 18:33:00 +1100353 if p.done || !isQuote(p.s[0]) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700354 break
355 }
356 p.advance()
357 if p.cur.err != nil {
358 return &p.cur
359 }
360 cat.value += " " + p.cur.value
361 cat.unquoted += p.cur.unquoted
362 }
363 p.done = false // parser may have seen EOF, but we want to return cat
364 p.cur = cat
365 }
366 return &p.cur
367}
368
David Symonds3ea3e052014-12-22 16:15:28 +1100369func (p *textParser) consumeToken(s string) error {
370 tok := p.next()
371 if tok.err != nil {
372 return tok.err
373 }
374 if tok.value != s {
375 p.back()
376 return p.errorf("expected %q, found %q", s, tok.value)
377 }
378 return nil
379}
380
David Symonds2a1c6b92014-10-12 16:42:41 +1100381// Return a RequiredNotSetError indicating which required field was not set.
382func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError {
Rob Pike97e934d2011-04-11 12:52:49 -0700383 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700384 sprops := GetProperties(st)
385 for i := 0; i < st.NumField(); i++ {
Rob Pike97e934d2011-04-11 12:52:49 -0700386 if !isNil(sv.Field(i)) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700387 continue
388 }
389
390 props := sprops.Prop[i]
391 if props.Required {
David Symonds2a1c6b92014-10-12 16:42:41 +1100392 return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
Rob Pikeaaa3a622010-03-20 22:32:34 -0700393 }
394 }
David Symonds2a1c6b92014-10-12 16:42:41 +1100395 return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
Rob Pikeaaa3a622010-03-20 22:32:34 -0700396}
397
398// Returns the index in the struct for the named field, as well as the parsed tag properties.
David Symonds59b73b32015-08-24 13:22:02 +1000399func structFieldByName(sprops *StructProperties, name string) (int, *Properties, bool) {
David Symonds2bba1b22012-09-26 14:53:08 +1000400 i, ok := sprops.decoderOrigNames[name]
David Symonds79eae332010-10-16 11:33:20 +1100401 if ok {
402 return i, sprops.Prop[i], true
Rob Pikeaaa3a622010-03-20 22:32:34 -0700403 }
404 return -1, nil, false
405}
406
David Symonds54531052011-12-08 12:00:31 +1100407// Consume a ':' from the input stream (if the next token is a colon),
408// returning an error if a colon is needed but not present.
409func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
410 tok := p.next()
411 if tok.err != nil {
412 return tok.err
413 }
414 if tok.value != ":" {
415 // Colon is optional when the field is a group or message.
416 needColon := true
417 switch props.Wire {
418 case "group":
419 needColon = false
420 case "bytes":
421 // A "bytes" field is either a message, a string, or a repeated field;
422 // those three become *T, *string and []T respectively, so we can check for
423 // this field being a pointer to a non-string.
424 if typ.Kind() == reflect.Ptr {
425 // *T or *string
426 if typ.Elem().Kind() == reflect.String {
427 break
428 }
429 } else if typ.Kind() == reflect.Slice {
430 // []T or []*T
431 if typ.Elem().Kind() != reflect.Ptr {
432 break
433 }
David Symondsabd3b412014-11-28 11:43:44 +1100434 } else if typ.Kind() == reflect.String {
435 // The proto3 exception is for a string field,
436 // which requires a colon.
437 break
David Symonds54531052011-12-08 12:00:31 +1100438 }
439 needColon = false
440 }
441 if needColon {
442 return p.errorf("expected ':', found %q", tok.value)
443 }
444 p.back()
445 }
446 return nil
447}
448
David Symonds2a1c6b92014-10-12 16:42:41 +1100449func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
Rob Pike97e934d2011-04-11 12:52:49 -0700450 st := sv.Type()
David Symonds59b73b32015-08-24 13:22:02 +1000451 sprops := GetProperties(st)
452 reqCount := sprops.reqCount
David Symonds2a1c6b92014-10-12 16:42:41 +1100453 var reqFieldErr error
David Symonds8a099d02014-10-30 12:40:51 +1100454 fieldSet := make(map[string]bool)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700455 // A struct is a sequence of "name: value", terminated by one of
David Symonds54531052011-12-08 12:00:31 +1100456 // '>' or '}', or the end of the input. A name may also be
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100457 // "[extension]" or "[type/url]".
458 //
459 // The whole struct can also be an expanded Any message, like:
460 // [type/url] < ... struct contents ... >
Rob Pikeaaa3a622010-03-20 22:32:34 -0700461 for {
462 tok := p.next()
463 if tok.err != nil {
464 return tok.err
465 }
466 if tok.value == terminator {
467 break
468 }
David Symonds54531052011-12-08 12:00:31 +1100469 if tok.value == "[" {
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100470 // Looks like an extension or an Any.
David Symonds54531052011-12-08 12:00:31 +1100471 //
472 // TODO: Check whether we need to handle
473 // namespace rooted names (e.g. ".something.Foo").
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100474 extName, err := p.consumeExtName()
475 if err != nil {
476 return err
David Symonds54531052011-12-08 12:00:31 +1100477 }
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100478
479 if s := strings.LastIndex(extName, "/"); s >= 0 {
480 // If it contains a slash, it's an Any type URL.
481 messageName := extName[s+1:]
482 mt := MessageType(messageName)
483 if mt == nil {
484 return p.errorf("unrecognized message %q in google.protobuf.Any", messageName)
485 }
486 tok = p.next()
487 if tok.err != nil {
488 return tok.err
489 }
490 // consume an optional colon
491 if tok.value == ":" {
492 tok = p.next()
493 if tok.err != nil {
494 return tok.err
495 }
496 }
497 var terminator string
498 switch tok.value {
499 case "<":
500 terminator = ">"
501 case "{":
502 terminator = "}"
503 default:
504 return p.errorf("expected '{' or '<', found %q", tok.value)
505 }
506 v := reflect.New(mt.Elem())
507 if pe := p.readStruct(v.Elem(), terminator); pe != nil {
508 return pe
509 }
510 b, err := Marshal(v.Interface().(Message))
511 if err != nil {
512 return p.errorf("failed to marshal message of type %q: %v", messageName, err)
513 }
Luke Granger-Brown1687f002016-07-27 05:08:00 -0700514 if fieldSet["type_url"] {
515 return p.errorf(anyRepeatedlyUnpacked, "type_url")
516 }
517 if fieldSet["value"] {
518 return p.errorf(anyRepeatedlyUnpacked, "value")
519 }
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100520 sv.FieldByName("TypeUrl").SetString(extName)
521 sv.FieldByName("Value").SetBytes(b)
Luke Granger-Brown1687f002016-07-27 05:08:00 -0700522 fieldSet["type_url"] = true
523 fieldSet["value"] = true
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100524 continue
525 }
526
David Symonds54531052011-12-08 12:00:31 +1100527 var desc *ExtensionDesc
528 // This could be faster, but it's functional.
529 // TODO: Do something smarter than a linear scan.
David Symonds9f60f432012-06-14 09:45:25 +1000530 for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100531 if d.Name == extName {
David Symonds54531052011-12-08 12:00:31 +1100532 desc = d
533 break
Rob Pikeaaa3a622010-03-20 22:32:34 -0700534 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700535 }
David Symonds54531052011-12-08 12:00:31 +1100536 if desc == nil {
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100537 return p.errorf("unrecognized extension %q", extName)
David Symonds54531052011-12-08 12:00:31 +1100538 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700539
David Symonds54531052011-12-08 12:00:31 +1100540 props := &Properties{}
541 props.Parse(desc.Tag)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700542
David Symonds54531052011-12-08 12:00:31 +1100543 typ := reflect.TypeOf(desc.ExtensionType)
544 if err := p.checkForColon(props, typ); err != nil {
545 return err
546 }
547
David Symonds61826da2012-05-05 09:31:28 +1000548 rep := desc.repeated()
549
David Symonds54531052011-12-08 12:00:31 +1100550 // Read the extension structure, and set it in
551 // the value we're constructing.
David Symonds61826da2012-05-05 09:31:28 +1000552 var ext reflect.Value
553 if !rep {
554 ext = reflect.New(typ).Elem()
555 } else {
556 ext = reflect.New(typ.Elem()).Elem()
557 }
David Symonds54531052011-12-08 12:00:31 +1100558 if err := p.readAny(ext, props); err != nil {
David Symonds2a1c6b92014-10-12 16:42:41 +1100559 if _, ok := err.(*RequiredNotSetError); !ok {
560 return err
561 }
562 reqFieldErr = err
David Symonds54531052011-12-08 12:00:31 +1100563 }
matloob@google.come51d0022016-05-23 09:09:04 -0400564 ep := sv.Addr().Interface().(Message)
David Symonds61826da2012-05-05 09:31:28 +1000565 if !rep {
566 SetExtension(ep, desc, ext.Interface())
567 } else {
568 old, err := GetExtension(ep, desc)
569 var sl reflect.Value
570 if err == nil {
571 sl = reflect.ValueOf(old) // existing slice
572 } else {
573 sl = reflect.MakeSlice(typ, 0, 1)
574 }
575 sl = reflect.Append(sl, ext)
576 SetExtension(ep, desc, sl.Interface())
577 }
David Symonds59b73b32015-08-24 13:22:02 +1000578 if err := p.consumeOptionalSeparator(); err != nil {
579 return err
580 }
581 continue
582 }
583
584 // This is a normal, non-extension field.
585 name := tok.value
586 var dst reflect.Value
587 fi, props, ok := structFieldByName(sprops, name)
588 if ok {
589 dst = sv.Field(fi)
David Symonds1baed092015-08-25 15:42:00 +1000590 } else if oop, ok := sprops.OneofTypes[name]; ok {
591 // It is a oneof.
592 props = oop.Prop
593 nv := reflect.New(oop.Type.Elem())
594 dst = nv.Elem().Field(0)
Googlera66a4fa2016-11-01 19:59:13 +0000595 field := sv.Field(oop.Field)
596 if !field.IsNil() {
597 return p.errorf("oneof field '%s' is already set", name)
598 }
599 field.Set(nv)
David Symonds59b73b32015-08-24 13:22:02 +1000600 }
601 if !dst.IsValid() {
602 return p.errorf("unknown field name %q in %v", name, st)
603 }
David Symonds54531052011-12-08 12:00:31 +1100604
David Symonds59b73b32015-08-24 13:22:02 +1000605 if dst.Kind() == reflect.Map {
606 // Consume any colon.
607 if err := p.checkForColon(props, dst.Type()); err != nil {
David Symonds54531052011-12-08 12:00:31 +1100608 return err
609 }
610
David Symonds59b73b32015-08-24 13:22:02 +1000611 // Construct the map if it doesn't already exist.
612 if dst.IsNil() {
613 dst.Set(reflect.MakeMap(dst.Type()))
David Symonds54531052011-12-08 12:00:31 +1100614 }
David Symonds59b73b32015-08-24 13:22:02 +1000615 key := reflect.New(dst.Type().Key()).Elem()
616 val := reflect.New(dst.Type().Elem()).Elem()
617
618 // The map entry should be this sequence of tokens:
619 // < key : KEY value : VALUE >
Ross Light11114612016-05-25 19:11:34 -0400620 // However, implementations may omit key or value, and technically
621 // we should support them in any order. See b/28924776 for a time
622 // this went wrong.
David Symonds59b73b32015-08-24 13:22:02 +1000623
624 tok := p.next()
625 var terminator string
626 switch tok.value {
627 case "<":
628 terminator = ">"
629 case "{":
630 terminator = "}"
631 default:
632 return p.errorf("expected '{' or '<', found %q", tok.value)
633 }
Ross Light11114612016-05-25 19:11:34 -0400634 for {
635 tok := p.next()
636 if tok.err != nil {
637 return tok.err
638 }
639 if tok.value == terminator {
640 break
641 }
642 switch tok.value {
643 case "key":
644 if err := p.consumeToken(":"); err != nil {
645 return err
646 }
647 if err := p.readAny(key, props.mkeyprop); err != nil {
648 return err
649 }
650 if err := p.consumeOptionalSeparator(); err != nil {
651 return err
652 }
653 case "value":
654 if err := p.checkForColon(props.mvalprop, dst.Type().Elem()); err != nil {
655 return err
656 }
657 if err := p.readAny(val, props.mvalprop); err != nil {
658 return err
659 }
660 if err := p.consumeOptionalSeparator(); err != nil {
661 return err
662 }
663 default:
664 p.back()
665 return p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
666 }
David Symonds59b73b32015-08-24 13:22:02 +1000667 }
668
669 dst.SetMapIndex(key, val)
670 continue
671 }
672
673 // Check that it's not already set if it's not a repeated field.
674 if !props.Repeated && fieldSet[name] {
675 return p.errorf("non-repeated field %q was repeated", name)
676 }
677
678 if err := p.checkForColon(props, dst.Type()); err != nil {
679 return err
680 }
681
682 // Parse into the field.
683 fieldSet[name] = true
684 if err := p.readAny(dst, props); err != nil {
685 if _, ok := err.(*RequiredNotSetError); !ok {
686 return err
687 }
688 reqFieldErr = err
Bryan Mills78550bb2016-04-01 08:55:00 +1100689 }
690 if props.Required {
David Symonds59b73b32015-08-24 13:22:02 +1000691 reqCount--
Rob Pikeaaa3a622010-03-20 22:32:34 -0700692 }
David Symondsbe02a4a2012-12-06 15:20:41 +1100693
David Symonds056d5ce2015-05-12 19:27:00 +1000694 if err := p.consumeOptionalSeparator(); err != nil {
695 return err
David Symondsbe02a4a2012-12-06 15:20:41 +1100696 }
David Symonds056d5ce2015-05-12 19:27:00 +1000697
Rob Pikeaaa3a622010-03-20 22:32:34 -0700698 }
699
700 if reqCount > 0 {
701 return p.missingRequiredFieldError(sv)
702 }
David Symonds2a1c6b92014-10-12 16:42:41 +1100703 return reqFieldErr
Rob Pikeaaa3a622010-03-20 22:32:34 -0700704}
705
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100706// consumeExtName consumes extension name or expanded Any type URL and the
707// following ']'. It returns the name or URL consumed.
708func (p *textParser) consumeExtName() (string, error) {
709 tok := p.next()
710 if tok.err != nil {
711 return "", tok.err
712 }
713
714 // If extension name or type url is quoted, it's a single token.
715 if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
716 name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
717 if err != nil {
718 return "", err
719 }
720 return name, p.consumeToken("]")
721 }
722
723 // Consume everything up to "]"
724 var parts []string
725 for tok.value != "]" {
726 parts = append(parts, tok.value)
727 tok = p.next()
728 if tok.err != nil {
729 return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
730 }
731 }
732 return strings.Join(parts, ""), nil
733}
734
David Symonds056d5ce2015-05-12 19:27:00 +1000735// consumeOptionalSeparator consumes an optional semicolon or comma.
736// It is used in readStruct to provide backward compatibility.
737func (p *textParser) consumeOptionalSeparator() error {
738 tok := p.next()
739 if tok.err != nil {
740 return tok.err
741 }
742 if tok.value != ";" && tok.value != "," {
743 p.back()
744 }
745 return nil
746}
747
David Symonds2a1c6b92014-10-12 16:42:41 +1100748func (p *textParser) readAny(v reflect.Value, props *Properties) error {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700749 tok := p.next()
750 if tok.err != nil {
751 return tok.err
752 }
753 if tok.value == "" {
Rob Piked6420b82011-04-13 16:37:04 -0700754 return p.errorf("unexpected EOF")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700755 }
756
Rob Pike97e934d2011-04-11 12:52:49 -0700757 switch fv := v; fv.Kind() {
758 case reflect.Slice:
759 at := v.Type()
Rob Pikeab5b8022010-06-21 17:47:58 -0700760 if at.Elem().Kind() == reflect.Uint8 {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700761 // Special case for []byte
David Symonds162d0032012-06-28 09:44:46 -0700762 if tok.value[0] != '"' && tok.value[0] != '\'' {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700763 // Deliberately written out here, as the error after
764 // this switch statement would write "invalid []byte: ...",
765 // which is not as user-friendly.
Rob Piked6420b82011-04-13 16:37:04 -0700766 return p.errorf("invalid string: %v", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700767 }
768 bytes := []byte(tok.unquoted)
Nigel Tao4ede8452011-04-28 11:27:25 +1000769 fv.Set(reflect.ValueOf(bytes))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700770 return nil
771 }
Lorenzo Simionatodeb4a5e2015-10-09 07:39:00 +1100772 // Repeated field.
773 if tok.value == "[" {
774 // Repeated field with list notation, like [1,2,3].
775 for {
776 fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
777 err := p.readAny(fv.Index(fv.Len()-1), props)
778 if err != nil {
779 return err
780 }
781 tok := p.next()
782 if tok.err != nil {
783 return tok.err
784 }
785 if tok.value == "]" {
786 break
787 }
788 if tok.value != "," {
789 return p.errorf("Expected ']' or ',' found %q", tok.value)
790 }
791 }
792 return nil
David Symonds79eae332010-10-16 11:33:20 +1100793 }
Lorenzo Simionatodeb4a5e2015-10-09 07:39:00 +1100794 // One value of the repeated field.
Rob Pikeaaa3a622010-03-20 22:32:34 -0700795 p.back()
Lorenzo Simionatodeb4a5e2015-10-09 07:39:00 +1100796 fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
797 return p.readAny(fv.Index(fv.Len()-1), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700798 case reflect.Bool:
light1f49d832016-08-24 20:12:15 +0000799 // true/1/t/True or false/f/0/False.
Rob Pikeaaa3a622010-03-20 22:32:34 -0700800 switch tok.value {
light1f49d832016-08-24 20:12:15 +0000801 case "true", "1", "t", "True":
Rob Pike97e934d2011-04-11 12:52:49 -0700802 fv.SetBool(true)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700803 return nil
light1f49d832016-08-24 20:12:15 +0000804 case "false", "0", "f", "False":
Rob Pike97e934d2011-04-11 12:52:49 -0700805 fv.SetBool(false)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700806 return nil
807 }
Rob Pike97e934d2011-04-11 12:52:49 -0700808 case reflect.Float32, reflect.Float64:
David Symonds6bd081e2012-06-28 10:46:25 -0700809 v := tok.value
David Symondsbe02a4a2012-12-06 15:20:41 +1100810 // Ignore 'f' for compatibility with output generated by C++, but don't
811 // remove 'f' when the value is "-inf" or "inf".
812 if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
David Symonds6bd081e2012-06-28 10:46:25 -0700813 v = v[:len(v)-1]
814 }
815 if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
Rob Pike97e934d2011-04-11 12:52:49 -0700816 fv.SetFloat(f)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700817 return nil
818 }
Rob Pike19b2dbb2011-04-11 16:49:15 -0700819 case reflect.Int32:
David Symonds32612dd2012-06-15 07:59:05 -0700820 if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700821 fv.SetInt(x)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700822 return nil
Rob Pike19b2dbb2011-04-11 16:49:15 -0700823 }
David Symonds8bb628d2014-07-22 13:49:35 +1000824
Rob Pike19b2dbb2011-04-11 16:49:15 -0700825 if len(props.Enum) == 0 {
826 break
827 }
828 m, ok := enumValueMaps[props.Enum]
829 if !ok {
830 break
831 }
832 x, ok := m[tok.value]
833 if !ok {
834 break
835 }
836 fv.SetInt(int64(x))
837 return nil
838 case reflect.Int64:
David Symonds32612dd2012-06-15 07:59:05 -0700839 if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700840 fv.SetInt(x)
841 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700842 }
David Symonds8bb628d2014-07-22 13:49:35 +1000843
Rob Pike97e934d2011-04-11 12:52:49 -0700844 case reflect.Ptr:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700845 // A basic field (indirected through pointer), or a repeated message/group
846 p.back()
Rob Pikeccd260c2011-04-18 13:13:04 -0700847 fv.Set(reflect.New(fv.Type().Elem()))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700848 return p.readAny(fv.Elem(), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700849 case reflect.String:
David Symonds162d0032012-06-28 09:44:46 -0700850 if tok.value[0] == '"' || tok.value[0] == '\'' {
Rob Pike97e934d2011-04-11 12:52:49 -0700851 fv.SetString(tok.unquoted)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700852 return nil
853 }
Rob Pike97e934d2011-04-11 12:52:49 -0700854 case reflect.Struct:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700855 var terminator string
856 switch tok.value {
857 case "{":
858 terminator = "}"
859 case "<":
860 terminator = ">"
861 default:
Rob Piked6420b82011-04-13 16:37:04 -0700862 return p.errorf("expected '{' or '<', found %q", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700863 }
David Symonds81177532014-11-20 14:33:40 +1100864 // TODO: Handle nested messages which implement encoding.TextUnmarshaler.
Rob Pikeaaa3a622010-03-20 22:32:34 -0700865 return p.readStruct(fv, terminator)
Rob Pike19b2dbb2011-04-11 16:49:15 -0700866 case reflect.Uint32:
David Symonds32612dd2012-06-15 07:59:05 -0700867 if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700868 fv.SetUint(uint64(x))
869 return nil
870 }
871 case reflect.Uint64:
David Symonds32612dd2012-06-15 07:59:05 -0700872 if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700873 fv.SetUint(x)
874 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700875 }
876 }
Rob Piked6420b82011-04-13 16:37:04 -0700877 return p.errorf("invalid %v: %v", v.Type(), tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700878}
879
David Symonds501f7db2013-08-05 13:53:28 +1000880// UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
881// before starting to unmarshal, so any existing data in pb is always removed.
David Symonds2a1c6b92014-10-12 16:42:41 +1100882// If a required field is not set and no other error occurs,
883// UnmarshalText returns *RequiredNotSetError.
David Symonds9f60f432012-06-14 09:45:25 +1000884func UnmarshalText(s string, pb Message) error {
David Symonds81177532014-11-20 14:33:40 +1100885 if um, ok := pb.(encoding.TextUnmarshaler); ok {
David Symonds267e8052014-02-19 14:50:51 +1100886 err := um.UnmarshalText([]byte(s))
887 return err
888 }
David Symonds501f7db2013-08-05 13:53:28 +1000889 pb.Reset()
Nigel Tao4ede8452011-04-28 11:27:25 +1000890 v := reflect.ValueOf(pb)
David Symondsa9cda212011-04-15 01:23:17 -0700891 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700892 return pe
893 }
894 return nil
895}