blob: 0b8c59f746eb7056c50666229ae6390d5019aa49 [file] [log] [blame]
Rob Pikeaaa3a622010-03-20 22:32:34 -07001// Go support for Protocol Buffers - Google's data interchange format
2//
David Symondsee6e9c52012-11-29 08:51:07 +11003// Copyright 2010 The Go Authors. All rights reserved.
David Symonds558f13f2014-11-24 10:28:53 +11004// https://github.com/golang/protobuf
Rob Pikeaaa3a622010-03-20 22:32:34 -07005//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32package proto
33
34// Functions for parsing the Text protocol buffer format.
David Symonds54531052011-12-08 12:00:31 +110035// TODO: message sets.
Rob Pikeaaa3a622010-03-20 22:32:34 -070036
37import (
David Symonds81177532014-11-20 14:33:40 +110038 "encoding"
David Symondsfa94a1e2012-09-24 13:21:49 +100039 "errors"
Rob Pikeaaa3a622010-03-20 22:32:34 -070040 "fmt"
Rob Pikeaaa3a622010-03-20 22:32:34 -070041 "reflect"
42 "strconv"
David Symonds183124e2012-03-23 13:20:23 +110043 "strings"
David Symondsfa94a1e2012-09-24 13:21:49 +100044 "unicode/utf8"
Rob Pikeaaa3a622010-03-20 22:32:34 -070045)
46
Rob Pikeaaa3a622010-03-20 22:32:34 -070047type ParseError struct {
48 Message string
49 Line int // 1-based line number
50 Offset int // 0-based byte offset from start of input
51}
52
Rob Pikea17fdd92011-11-02 12:43:05 -070053func (p *ParseError) Error() string {
Rob Pikeaaa3a622010-03-20 22:32:34 -070054 if p.Line == 1 {
55 // show offset only for first line
56 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
57 }
58 return fmt.Sprintf("line %d: %v", p.Line, p.Message)
59}
60
61type token struct {
62 value string
63 err *ParseError
64 line int // line number
65 offset int // byte number from start of input, not start of line
66 unquoted string // the unquoted version of value, if it was a quoted string
67}
68
69func (t *token) String() string {
70 if t.err == nil {
71 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
72 }
73 return fmt.Sprintf("parse error: %v", t.err)
74}
75
76type textParser struct {
77 s string // remaining input
78 done bool // whether the parsing is finished (success or error)
79 backed bool // whether back() was called
80 offset, line int
81 cur token
82}
83
84func newTextParser(s string) *textParser {
85 p := new(textParser)
86 p.s = s
87 p.line = 1
88 p.cur.line = 1
89 return p
90}
91
Rob Piked6420b82011-04-13 16:37:04 -070092func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
Rob Pikead7cac72010-09-29 12:29:26 -070093 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
Rob Pikeaaa3a622010-03-20 22:32:34 -070094 p.cur.err = pe
95 p.done = true
96 return pe
97}
98
99// Numbers and identifiers are matched by [-+._A-Za-z0-9]
100func isIdentOrNumberChar(c byte) bool {
101 switch {
102 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
103 return true
104 case '0' <= c && c <= '9':
105 return true
106 }
107 switch c {
108 case '-', '+', '.', '_':
109 return true
110 }
111 return false
112}
113
114func isWhitespace(c byte) bool {
115 switch c {
116 case ' ', '\t', '\n', '\r':
117 return true
118 }
119 return false
120}
121
Daniel Kraftb9827042016-02-02 18:33:00 +1100122func isQuote(c byte) bool {
123 switch c {
124 case '"', '\'':
125 return true
126 }
127 return false
128}
129
Rob Pikeaaa3a622010-03-20 22:32:34 -0700130func (p *textParser) skipWhitespace() {
131 i := 0
132 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
133 if p.s[i] == '#' {
134 // comment; skip to end of line or input
135 for i < len(p.s) && p.s[i] != '\n' {
136 i++
137 }
138 if i == len(p.s) {
139 break
140 }
141 }
142 if p.s[i] == '\n' {
143 p.line++
144 }
145 i++
146 }
147 p.offset += i
148 p.s = p.s[i:len(p.s)]
149 if len(p.s) == 0 {
150 p.done = true
151 }
152}
153
154func (p *textParser) advance() {
155 // Skip whitespace
156 p.skipWhitespace()
157 if p.done {
158 return
159 }
160
161 // Start of non-whitespace
162 p.cur.err = nil
163 p.cur.offset, p.cur.line = p.offset, p.line
164 p.cur.unquoted = ""
165 switch p.s[0] {
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100166 case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
Rob Pikeaaa3a622010-03-20 22:32:34 -0700167 // Single symbol
168 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
David Symonds162d0032012-06-28 09:44:46 -0700169 case '"', '\'':
Rob Pikeaaa3a622010-03-20 22:32:34 -0700170 // Quoted string
171 i := 1
David Symonds162d0032012-06-28 09:44:46 -0700172 for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700173 if p.s[i] == '\\' && i+1 < len(p.s) {
174 // skip escaped char
175 i++
176 }
177 i++
178 }
David Symonds162d0032012-06-28 09:44:46 -0700179 if i >= len(p.s) || p.s[i] != p.s[0] {
Rob Piked6420b82011-04-13 16:37:04 -0700180 p.errorf("unmatched quote")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700181 return
182 }
David Symondsfa94a1e2012-09-24 13:21:49 +1000183 unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700184 if err != nil {
David Symondsbafa7bc2015-07-01 07:59:00 +1000185 p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700186 return
187 }
188 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
189 p.cur.unquoted = unq
190 default:
191 i := 0
192 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
193 i++
194 }
195 if i == 0 {
Rob Piked6420b82011-04-13 16:37:04 -0700196 p.errorf("unexpected byte %#x", p.s[0])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700197 return
198 }
199 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
200 }
201 p.offset += len(p.cur.value)
202}
203
David Symondsfa94a1e2012-09-24 13:21:49 +1000204var (
David Symondsa7f3a0f2013-09-09 13:32:33 +1000205 errBadUTF8 = errors.New("proto: bad UTF-8")
206 errBadHex = errors.New("proto: bad hexadecimal")
David Symondsfa94a1e2012-09-24 13:21:49 +1000207)
208
209func unquoteC(s string, quote rune) (string, error) {
210 // This is based on C++'s tokenizer.cc.
211 // Despite its name, this is *not* parsing C syntax.
212 // For instance, "\0" is an invalid quoted string.
213
214 // Avoid allocation in trivial cases.
215 simple := true
216 for _, r := range s {
217 if r == '\\' || r == quote {
218 simple = false
219 break
220 }
David Symonds162d0032012-06-28 09:44:46 -0700221 }
David Symondsfa94a1e2012-09-24 13:21:49 +1000222 if simple {
223 return s, nil
224 }
225
226 buf := make([]byte, 0, 3*len(s)/2)
227 for len(s) > 0 {
228 r, n := utf8.DecodeRuneInString(s)
229 if r == utf8.RuneError && n == 1 {
230 return "", errBadUTF8
231 }
232 s = s[n:]
233 if r != '\\' {
234 if r < utf8.RuneSelf {
235 buf = append(buf, byte(r))
236 } else {
237 buf = append(buf, string(r)...)
238 }
239 continue
240 }
241
242 ch, tail, err := unescape(s)
243 if err != nil {
244 return "", err
245 }
246 buf = append(buf, ch...)
247 s = tail
248 }
249 return string(buf), nil
David Symonds162d0032012-06-28 09:44:46 -0700250}
251
David Symondsfa94a1e2012-09-24 13:21:49 +1000252func unescape(s string) (ch string, tail string, err error) {
253 r, n := utf8.DecodeRuneInString(s)
254 if r == utf8.RuneError && n == 1 {
255 return "", "", errBadUTF8
David Symonds162d0032012-06-28 09:44:46 -0700256 }
David Symondsfa94a1e2012-09-24 13:21:49 +1000257 s = s[n:]
258 switch r {
259 case 'a':
260 return "\a", s, nil
261 case 'b':
262 return "\b", s, nil
263 case 'f':
264 return "\f", s, nil
265 case 'n':
266 return "\n", s, nil
267 case 'r':
268 return "\r", s, nil
269 case 't':
270 return "\t", s, nil
271 case 'v':
272 return "\v", s, nil
273 case '?':
274 return "?", s, nil // trigraph workaround
275 case '\'', '"', '\\':
276 return string(r), s, nil
277 case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
278 if len(s) < 2 {
279 return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
280 }
281 base := 8
282 ss := s[:2]
283 s = s[2:]
284 if r == 'x' || r == 'X' {
285 base = 16
286 } else {
287 ss = string(r) + ss
288 }
289 i, err := strconv.ParseUint(ss, base, 8)
290 if err != nil {
291 return "", "", err
292 }
293 return string([]byte{byte(i)}), s, nil
294 case 'u', 'U':
295 n := 4
296 if r == 'U' {
297 n = 8
298 }
299 if len(s) < n {
300 return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
301 }
David Symonds162d0032012-06-28 09:44:46 -0700302
David Symondsfa94a1e2012-09-24 13:21:49 +1000303 bs := make([]byte, n/2)
304 for i := 0; i < n; i += 2 {
305 a, ok1 := unhex(s[i])
306 b, ok2 := unhex(s[i+1])
307 if !ok1 || !ok2 {
308 return "", "", errBadHex
309 }
310 bs[i/2] = a<<4 | b
311 }
312 s = s[n:]
313 return string(bs), s, nil
314 }
315 return "", "", fmt.Errorf(`unknown escape \%c`, r)
316}
317
318// Adapted from src/pkg/strconv/quote.go.
319func unhex(b byte) (v byte, ok bool) {
320 switch {
321 case '0' <= b && b <= '9':
322 return b - '0', true
323 case 'a' <= b && b <= 'f':
324 return b - 'a' + 10, true
325 case 'A' <= b && b <= 'F':
326 return b - 'A' + 10, true
327 }
328 return 0, false
David Symonds183124e2012-03-23 13:20:23 +1100329}
330
Rob Pikeaaa3a622010-03-20 22:32:34 -0700331// Back off the parser by one token. Can only be done between calls to next().
332// It makes the next advance() a no-op.
333func (p *textParser) back() { p.backed = true }
334
335// Advances the parser and returns the new current token.
336func (p *textParser) next() *token {
337 if p.backed || p.done {
338 p.backed = false
339 return &p.cur
340 }
341 p.advance()
342 if p.done {
343 p.cur.value = ""
Daniel Kraftb9827042016-02-02 18:33:00 +1100344 } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700345 // Look for multiple quoted strings separated by whitespace,
346 // and concatenate them.
347 cat := p.cur
348 for {
349 p.skipWhitespace()
Daniel Kraftb9827042016-02-02 18:33:00 +1100350 if p.done || !isQuote(p.s[0]) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700351 break
352 }
353 p.advance()
354 if p.cur.err != nil {
355 return &p.cur
356 }
357 cat.value += " " + p.cur.value
358 cat.unquoted += p.cur.unquoted
359 }
360 p.done = false // parser may have seen EOF, but we want to return cat
361 p.cur = cat
362 }
363 return &p.cur
364}
365
David Symonds3ea3e052014-12-22 16:15:28 +1100366func (p *textParser) consumeToken(s string) error {
367 tok := p.next()
368 if tok.err != nil {
369 return tok.err
370 }
371 if tok.value != s {
372 p.back()
373 return p.errorf("expected %q, found %q", s, tok.value)
374 }
375 return nil
376}
377
David Symonds2a1c6b92014-10-12 16:42:41 +1100378// Return a RequiredNotSetError indicating which required field was not set.
379func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError {
Rob Pike97e934d2011-04-11 12:52:49 -0700380 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700381 sprops := GetProperties(st)
382 for i := 0; i < st.NumField(); i++ {
Rob Pike97e934d2011-04-11 12:52:49 -0700383 if !isNil(sv.Field(i)) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700384 continue
385 }
386
387 props := sprops.Prop[i]
388 if props.Required {
David Symonds2a1c6b92014-10-12 16:42:41 +1100389 return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
Rob Pikeaaa3a622010-03-20 22:32:34 -0700390 }
391 }
David Symonds2a1c6b92014-10-12 16:42:41 +1100392 return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
Rob Pikeaaa3a622010-03-20 22:32:34 -0700393}
394
395// Returns the index in the struct for the named field, as well as the parsed tag properties.
David Symonds59b73b32015-08-24 13:22:02 +1000396func structFieldByName(sprops *StructProperties, name string) (int, *Properties, bool) {
David Symonds2bba1b22012-09-26 14:53:08 +1000397 i, ok := sprops.decoderOrigNames[name]
David Symonds79eae332010-10-16 11:33:20 +1100398 if ok {
399 return i, sprops.Prop[i], true
Rob Pikeaaa3a622010-03-20 22:32:34 -0700400 }
401 return -1, nil, false
402}
403
David Symonds54531052011-12-08 12:00:31 +1100404// Consume a ':' from the input stream (if the next token is a colon),
405// returning an error if a colon is needed but not present.
406func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
407 tok := p.next()
408 if tok.err != nil {
409 return tok.err
410 }
411 if tok.value != ":" {
412 // Colon is optional when the field is a group or message.
413 needColon := true
414 switch props.Wire {
415 case "group":
416 needColon = false
417 case "bytes":
418 // A "bytes" field is either a message, a string, or a repeated field;
419 // those three become *T, *string and []T respectively, so we can check for
420 // this field being a pointer to a non-string.
421 if typ.Kind() == reflect.Ptr {
422 // *T or *string
423 if typ.Elem().Kind() == reflect.String {
424 break
425 }
426 } else if typ.Kind() == reflect.Slice {
427 // []T or []*T
428 if typ.Elem().Kind() != reflect.Ptr {
429 break
430 }
David Symondsabd3b412014-11-28 11:43:44 +1100431 } else if typ.Kind() == reflect.String {
432 // The proto3 exception is for a string field,
433 // which requires a colon.
434 break
David Symonds54531052011-12-08 12:00:31 +1100435 }
436 needColon = false
437 }
438 if needColon {
439 return p.errorf("expected ':', found %q", tok.value)
440 }
441 p.back()
442 }
443 return nil
444}
445
David Symonds2a1c6b92014-10-12 16:42:41 +1100446func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
Rob Pike97e934d2011-04-11 12:52:49 -0700447 st := sv.Type()
David Symonds59b73b32015-08-24 13:22:02 +1000448 sprops := GetProperties(st)
449 reqCount := sprops.reqCount
David Symonds2a1c6b92014-10-12 16:42:41 +1100450 var reqFieldErr error
David Symonds8a099d02014-10-30 12:40:51 +1100451 fieldSet := make(map[string]bool)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700452 // A struct is a sequence of "name: value", terminated by one of
David Symonds54531052011-12-08 12:00:31 +1100453 // '>' or '}', or the end of the input. A name may also be
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100454 // "[extension]" or "[type/url]".
455 //
456 // The whole struct can also be an expanded Any message, like:
457 // [type/url] < ... struct contents ... >
Rob Pikeaaa3a622010-03-20 22:32:34 -0700458 for {
459 tok := p.next()
460 if tok.err != nil {
461 return tok.err
462 }
463 if tok.value == terminator {
464 break
465 }
David Symonds54531052011-12-08 12:00:31 +1100466 if tok.value == "[" {
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100467 // Looks like an extension or an Any.
David Symonds54531052011-12-08 12:00:31 +1100468 //
469 // TODO: Check whether we need to handle
470 // namespace rooted names (e.g. ".something.Foo").
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100471 extName, err := p.consumeExtName()
472 if err != nil {
473 return err
David Symonds54531052011-12-08 12:00:31 +1100474 }
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100475
476 if s := strings.LastIndex(extName, "/"); s >= 0 {
477 // If it contains a slash, it's an Any type URL.
478 messageName := extName[s+1:]
479 mt := MessageType(messageName)
480 if mt == nil {
481 return p.errorf("unrecognized message %q in google.protobuf.Any", messageName)
482 }
483 tok = p.next()
484 if tok.err != nil {
485 return tok.err
486 }
487 // consume an optional colon
488 if tok.value == ":" {
489 tok = p.next()
490 if tok.err != nil {
491 return tok.err
492 }
493 }
494 var terminator string
495 switch tok.value {
496 case "<":
497 terminator = ">"
498 case "{":
499 terminator = "}"
500 default:
501 return p.errorf("expected '{' or '<', found %q", tok.value)
502 }
503 v := reflect.New(mt.Elem())
504 if pe := p.readStruct(v.Elem(), terminator); pe != nil {
505 return pe
506 }
507 b, err := Marshal(v.Interface().(Message))
508 if err != nil {
509 return p.errorf("failed to marshal message of type %q: %v", messageName, err)
510 }
511 sv.FieldByName("TypeUrl").SetString(extName)
512 sv.FieldByName("Value").SetBytes(b)
513 continue
514 }
515
David Symonds54531052011-12-08 12:00:31 +1100516 var desc *ExtensionDesc
517 // This could be faster, but it's functional.
518 // TODO: Do something smarter than a linear scan.
David Symonds9f60f432012-06-14 09:45:25 +1000519 for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100520 if d.Name == extName {
David Symonds54531052011-12-08 12:00:31 +1100521 desc = d
522 break
Rob Pikeaaa3a622010-03-20 22:32:34 -0700523 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700524 }
David Symonds54531052011-12-08 12:00:31 +1100525 if desc == nil {
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100526 return p.errorf("unrecognized extension %q", extName)
David Symonds54531052011-12-08 12:00:31 +1100527 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700528
David Symonds54531052011-12-08 12:00:31 +1100529 props := &Properties{}
530 props.Parse(desc.Tag)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700531
David Symonds54531052011-12-08 12:00:31 +1100532 typ := reflect.TypeOf(desc.ExtensionType)
533 if err := p.checkForColon(props, typ); err != nil {
534 return err
535 }
536
David Symonds61826da2012-05-05 09:31:28 +1000537 rep := desc.repeated()
538
David Symonds54531052011-12-08 12:00:31 +1100539 // Read the extension structure, and set it in
540 // the value we're constructing.
David Symonds61826da2012-05-05 09:31:28 +1000541 var ext reflect.Value
542 if !rep {
543 ext = reflect.New(typ).Elem()
544 } else {
545 ext = reflect.New(typ.Elem()).Elem()
546 }
David Symonds54531052011-12-08 12:00:31 +1100547 if err := p.readAny(ext, props); err != nil {
David Symonds2a1c6b92014-10-12 16:42:41 +1100548 if _, ok := err.(*RequiredNotSetError); !ok {
549 return err
550 }
551 reqFieldErr = err
David Symonds54531052011-12-08 12:00:31 +1100552 }
matloob@google.come51d0022016-05-23 09:09:04 -0400553 ep := sv.Addr().Interface().(Message)
David Symonds61826da2012-05-05 09:31:28 +1000554 if !rep {
555 SetExtension(ep, desc, ext.Interface())
556 } else {
557 old, err := GetExtension(ep, desc)
558 var sl reflect.Value
559 if err == nil {
560 sl = reflect.ValueOf(old) // existing slice
561 } else {
562 sl = reflect.MakeSlice(typ, 0, 1)
563 }
564 sl = reflect.Append(sl, ext)
565 SetExtension(ep, desc, sl.Interface())
566 }
David Symonds59b73b32015-08-24 13:22:02 +1000567 if err := p.consumeOptionalSeparator(); err != nil {
568 return err
569 }
570 continue
571 }
572
573 // This is a normal, non-extension field.
574 name := tok.value
575 var dst reflect.Value
576 fi, props, ok := structFieldByName(sprops, name)
577 if ok {
578 dst = sv.Field(fi)
David Symonds1baed092015-08-25 15:42:00 +1000579 } else if oop, ok := sprops.OneofTypes[name]; ok {
580 // It is a oneof.
581 props = oop.Prop
582 nv := reflect.New(oop.Type.Elem())
583 dst = nv.Elem().Field(0)
584 sv.Field(oop.Field).Set(nv)
David Symonds59b73b32015-08-24 13:22:02 +1000585 }
586 if !dst.IsValid() {
587 return p.errorf("unknown field name %q in %v", name, st)
588 }
David Symonds54531052011-12-08 12:00:31 +1100589
David Symonds59b73b32015-08-24 13:22:02 +1000590 if dst.Kind() == reflect.Map {
591 // Consume any colon.
592 if err := p.checkForColon(props, dst.Type()); err != nil {
David Symonds54531052011-12-08 12:00:31 +1100593 return err
594 }
595
David Symonds59b73b32015-08-24 13:22:02 +1000596 // Construct the map if it doesn't already exist.
597 if dst.IsNil() {
598 dst.Set(reflect.MakeMap(dst.Type()))
David Symonds54531052011-12-08 12:00:31 +1100599 }
David Symonds59b73b32015-08-24 13:22:02 +1000600 key := reflect.New(dst.Type().Key()).Elem()
601 val := reflect.New(dst.Type().Elem()).Elem()
602
603 // The map entry should be this sequence of tokens:
604 // < key : KEY value : VALUE >
Ross Light11114612016-05-25 19:11:34 -0400605 // However, implementations may omit key or value, and technically
606 // we should support them in any order. See b/28924776 for a time
607 // this went wrong.
David Symonds59b73b32015-08-24 13:22:02 +1000608
609 tok := p.next()
610 var terminator string
611 switch tok.value {
612 case "<":
613 terminator = ">"
614 case "{":
615 terminator = "}"
616 default:
617 return p.errorf("expected '{' or '<', found %q", tok.value)
618 }
Ross Light11114612016-05-25 19:11:34 -0400619 for {
620 tok := p.next()
621 if tok.err != nil {
622 return tok.err
623 }
624 if tok.value == terminator {
625 break
626 }
627 switch tok.value {
628 case "key":
629 if err := p.consumeToken(":"); err != nil {
630 return err
631 }
632 if err := p.readAny(key, props.mkeyprop); err != nil {
633 return err
634 }
635 if err := p.consumeOptionalSeparator(); err != nil {
636 return err
637 }
638 case "value":
639 if err := p.checkForColon(props.mvalprop, dst.Type().Elem()); err != nil {
640 return err
641 }
642 if err := p.readAny(val, props.mvalprop); err != nil {
643 return err
644 }
645 if err := p.consumeOptionalSeparator(); err != nil {
646 return err
647 }
648 default:
649 p.back()
650 return p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
651 }
David Symonds59b73b32015-08-24 13:22:02 +1000652 }
653
654 dst.SetMapIndex(key, val)
655 continue
656 }
657
658 // Check that it's not already set if it's not a repeated field.
659 if !props.Repeated && fieldSet[name] {
660 return p.errorf("non-repeated field %q was repeated", name)
661 }
662
663 if err := p.checkForColon(props, dst.Type()); err != nil {
664 return err
665 }
666
667 // Parse into the field.
668 fieldSet[name] = true
669 if err := p.readAny(dst, props); err != nil {
670 if _, ok := err.(*RequiredNotSetError); !ok {
671 return err
672 }
673 reqFieldErr = err
Bryan Mills78550bb2016-04-01 08:55:00 +1100674 }
675 if props.Required {
David Symonds59b73b32015-08-24 13:22:02 +1000676 reqCount--
Rob Pikeaaa3a622010-03-20 22:32:34 -0700677 }
David Symondsbe02a4a2012-12-06 15:20:41 +1100678
David Symonds056d5ce2015-05-12 19:27:00 +1000679 if err := p.consumeOptionalSeparator(); err != nil {
680 return err
David Symondsbe02a4a2012-12-06 15:20:41 +1100681 }
David Symonds056d5ce2015-05-12 19:27:00 +1000682
Rob Pikeaaa3a622010-03-20 22:32:34 -0700683 }
684
685 if reqCount > 0 {
686 return p.missingRequiredFieldError(sv)
687 }
David Symonds2a1c6b92014-10-12 16:42:41 +1100688 return reqFieldErr
Rob Pikeaaa3a622010-03-20 22:32:34 -0700689}
690
Lev Shamardin1cc4d6f2016-03-04 03:50:00 +1100691// consumeExtName consumes extension name or expanded Any type URL and the
692// following ']'. It returns the name or URL consumed.
693func (p *textParser) consumeExtName() (string, error) {
694 tok := p.next()
695 if tok.err != nil {
696 return "", tok.err
697 }
698
699 // If extension name or type url is quoted, it's a single token.
700 if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
701 name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
702 if err != nil {
703 return "", err
704 }
705 return name, p.consumeToken("]")
706 }
707
708 // Consume everything up to "]"
709 var parts []string
710 for tok.value != "]" {
711 parts = append(parts, tok.value)
712 tok = p.next()
713 if tok.err != nil {
714 return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
715 }
716 }
717 return strings.Join(parts, ""), nil
718}
719
David Symonds056d5ce2015-05-12 19:27:00 +1000720// consumeOptionalSeparator consumes an optional semicolon or comma.
721// It is used in readStruct to provide backward compatibility.
722func (p *textParser) consumeOptionalSeparator() error {
723 tok := p.next()
724 if tok.err != nil {
725 return tok.err
726 }
727 if tok.value != ";" && tok.value != "," {
728 p.back()
729 }
730 return nil
731}
732
David Symonds2a1c6b92014-10-12 16:42:41 +1100733func (p *textParser) readAny(v reflect.Value, props *Properties) error {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700734 tok := p.next()
735 if tok.err != nil {
736 return tok.err
737 }
738 if tok.value == "" {
Rob Piked6420b82011-04-13 16:37:04 -0700739 return p.errorf("unexpected EOF")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700740 }
741
Rob Pike97e934d2011-04-11 12:52:49 -0700742 switch fv := v; fv.Kind() {
743 case reflect.Slice:
744 at := v.Type()
Rob Pikeab5b8022010-06-21 17:47:58 -0700745 if at.Elem().Kind() == reflect.Uint8 {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700746 // Special case for []byte
David Symonds162d0032012-06-28 09:44:46 -0700747 if tok.value[0] != '"' && tok.value[0] != '\'' {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700748 // Deliberately written out here, as the error after
749 // this switch statement would write "invalid []byte: ...",
750 // which is not as user-friendly.
Rob Piked6420b82011-04-13 16:37:04 -0700751 return p.errorf("invalid string: %v", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700752 }
753 bytes := []byte(tok.unquoted)
Nigel Tao4ede8452011-04-28 11:27:25 +1000754 fv.Set(reflect.ValueOf(bytes))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700755 return nil
756 }
Lorenzo Simionatodeb4a5e2015-10-09 07:39:00 +1100757 // Repeated field.
758 if tok.value == "[" {
759 // Repeated field with list notation, like [1,2,3].
760 for {
761 fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
762 err := p.readAny(fv.Index(fv.Len()-1), props)
763 if err != nil {
764 return err
765 }
766 tok := p.next()
767 if tok.err != nil {
768 return tok.err
769 }
770 if tok.value == "]" {
771 break
772 }
773 if tok.value != "," {
774 return p.errorf("Expected ']' or ',' found %q", tok.value)
775 }
776 }
777 return nil
David Symonds79eae332010-10-16 11:33:20 +1100778 }
Lorenzo Simionatodeb4a5e2015-10-09 07:39:00 +1100779 // One value of the repeated field.
Rob Pikeaaa3a622010-03-20 22:32:34 -0700780 p.back()
Lorenzo Simionatodeb4a5e2015-10-09 07:39:00 +1100781 fv.Set(reflect.Append(fv, reflect.New(at.Elem()).Elem()))
782 return p.readAny(fv.Index(fv.Len()-1), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700783 case reflect.Bool:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700784 // Either "true", "false", 1 or 0.
785 switch tok.value {
786 case "true", "1":
Rob Pike97e934d2011-04-11 12:52:49 -0700787 fv.SetBool(true)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700788 return nil
789 case "false", "0":
Rob Pike97e934d2011-04-11 12:52:49 -0700790 fv.SetBool(false)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700791 return nil
792 }
Rob Pike97e934d2011-04-11 12:52:49 -0700793 case reflect.Float32, reflect.Float64:
David Symonds6bd081e2012-06-28 10:46:25 -0700794 v := tok.value
David Symondsbe02a4a2012-12-06 15:20:41 +1100795 // Ignore 'f' for compatibility with output generated by C++, but don't
796 // remove 'f' when the value is "-inf" or "inf".
797 if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
David Symonds6bd081e2012-06-28 10:46:25 -0700798 v = v[:len(v)-1]
799 }
800 if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
Rob Pike97e934d2011-04-11 12:52:49 -0700801 fv.SetFloat(f)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700802 return nil
803 }
Rob Pike19b2dbb2011-04-11 16:49:15 -0700804 case reflect.Int32:
David Symonds32612dd2012-06-15 07:59:05 -0700805 if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700806 fv.SetInt(x)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700807 return nil
Rob Pike19b2dbb2011-04-11 16:49:15 -0700808 }
David Symonds8bb628d2014-07-22 13:49:35 +1000809
Rob Pike19b2dbb2011-04-11 16:49:15 -0700810 if len(props.Enum) == 0 {
811 break
812 }
813 m, ok := enumValueMaps[props.Enum]
814 if !ok {
815 break
816 }
817 x, ok := m[tok.value]
818 if !ok {
819 break
820 }
821 fv.SetInt(int64(x))
822 return nil
823 case reflect.Int64:
David Symonds32612dd2012-06-15 07:59:05 -0700824 if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700825 fv.SetInt(x)
826 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700827 }
David Symonds8bb628d2014-07-22 13:49:35 +1000828
Rob Pike97e934d2011-04-11 12:52:49 -0700829 case reflect.Ptr:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700830 // A basic field (indirected through pointer), or a repeated message/group
831 p.back()
Rob Pikeccd260c2011-04-18 13:13:04 -0700832 fv.Set(reflect.New(fv.Type().Elem()))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700833 return p.readAny(fv.Elem(), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700834 case reflect.String:
David Symonds162d0032012-06-28 09:44:46 -0700835 if tok.value[0] == '"' || tok.value[0] == '\'' {
Rob Pike97e934d2011-04-11 12:52:49 -0700836 fv.SetString(tok.unquoted)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700837 return nil
838 }
Rob Pike97e934d2011-04-11 12:52:49 -0700839 case reflect.Struct:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700840 var terminator string
841 switch tok.value {
842 case "{":
843 terminator = "}"
844 case "<":
845 terminator = ">"
846 default:
Rob Piked6420b82011-04-13 16:37:04 -0700847 return p.errorf("expected '{' or '<', found %q", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700848 }
David Symonds81177532014-11-20 14:33:40 +1100849 // TODO: Handle nested messages which implement encoding.TextUnmarshaler.
Rob Pikeaaa3a622010-03-20 22:32:34 -0700850 return p.readStruct(fv, terminator)
Rob Pike19b2dbb2011-04-11 16:49:15 -0700851 case reflect.Uint32:
David Symonds32612dd2012-06-15 07:59:05 -0700852 if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700853 fv.SetUint(uint64(x))
854 return nil
855 }
856 case reflect.Uint64:
David Symonds32612dd2012-06-15 07:59:05 -0700857 if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700858 fv.SetUint(x)
859 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700860 }
861 }
Rob Piked6420b82011-04-13 16:37:04 -0700862 return p.errorf("invalid %v: %v", v.Type(), tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700863}
864
David Symonds501f7db2013-08-05 13:53:28 +1000865// UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
866// before starting to unmarshal, so any existing data in pb is always removed.
David Symonds2a1c6b92014-10-12 16:42:41 +1100867// If a required field is not set and no other error occurs,
868// UnmarshalText returns *RequiredNotSetError.
David Symonds9f60f432012-06-14 09:45:25 +1000869func UnmarshalText(s string, pb Message) error {
David Symonds81177532014-11-20 14:33:40 +1100870 if um, ok := pb.(encoding.TextUnmarshaler); ok {
David Symonds267e8052014-02-19 14:50:51 +1100871 err := um.UnmarshalText([]byte(s))
872 return err
873 }
David Symonds501f7db2013-08-05 13:53:28 +1000874 pb.Reset()
Nigel Tao4ede8452011-04-28 11:27:25 +1000875 v := reflect.ValueOf(pb)
David Symondsa9cda212011-04-15 01:23:17 -0700876 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700877 return pe
878 }
879 return nil
880}