blob: dc477c97e897e15265c508cf44facf7435887164 [file] [log] [blame]
Rob Pikeaaa3a622010-03-20 22:32:34 -07001// Go support for Protocol Buffers - Google's data interchange format
2//
David Symondsee6e9c52012-11-29 08:51:07 +11003// Copyright 2010 The Go Authors. All rights reserved.
Rob Pikeaaa3a622010-03-20 22:32:34 -07004// http://code.google.com/p/goprotobuf/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32package proto
33
34// Functions for parsing the Text protocol buffer format.
David Symonds54531052011-12-08 12:00:31 +110035// TODO: message sets.
Rob Pikeaaa3a622010-03-20 22:32:34 -070036
37import (
David Symonds81177532014-11-20 14:33:40 +110038 "encoding"
David Symondsfa94a1e2012-09-24 13:21:49 +100039 "errors"
Rob Pikeaaa3a622010-03-20 22:32:34 -070040 "fmt"
Rob Pikeaaa3a622010-03-20 22:32:34 -070041 "reflect"
42 "strconv"
David Symonds183124e2012-03-23 13:20:23 +110043 "strings"
David Symondsfa94a1e2012-09-24 13:21:49 +100044 "unicode/utf8"
Rob Pikeaaa3a622010-03-20 22:32:34 -070045)
46
Rob Pikeaaa3a622010-03-20 22:32:34 -070047type ParseError struct {
48 Message string
49 Line int // 1-based line number
50 Offset int // 0-based byte offset from start of input
51}
52
Rob Pikea17fdd92011-11-02 12:43:05 -070053func (p *ParseError) Error() string {
Rob Pikeaaa3a622010-03-20 22:32:34 -070054 if p.Line == 1 {
55 // show offset only for first line
56 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
57 }
58 return fmt.Sprintf("line %d: %v", p.Line, p.Message)
59}
60
61type token struct {
62 value string
63 err *ParseError
64 line int // line number
65 offset int // byte number from start of input, not start of line
66 unquoted string // the unquoted version of value, if it was a quoted string
67}
68
69func (t *token) String() string {
70 if t.err == nil {
71 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
72 }
73 return fmt.Sprintf("parse error: %v", t.err)
74}
75
76type textParser struct {
77 s string // remaining input
78 done bool // whether the parsing is finished (success or error)
79 backed bool // whether back() was called
80 offset, line int
81 cur token
82}
83
84func newTextParser(s string) *textParser {
85 p := new(textParser)
86 p.s = s
87 p.line = 1
88 p.cur.line = 1
89 return p
90}
91
Rob Piked6420b82011-04-13 16:37:04 -070092func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
Rob Pikead7cac72010-09-29 12:29:26 -070093 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
Rob Pikeaaa3a622010-03-20 22:32:34 -070094 p.cur.err = pe
95 p.done = true
96 return pe
97}
98
99// Numbers and identifiers are matched by [-+._A-Za-z0-9]
100func isIdentOrNumberChar(c byte) bool {
101 switch {
102 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
103 return true
104 case '0' <= c && c <= '9':
105 return true
106 }
107 switch c {
108 case '-', '+', '.', '_':
109 return true
110 }
111 return false
112}
113
114func isWhitespace(c byte) bool {
115 switch c {
116 case ' ', '\t', '\n', '\r':
117 return true
118 }
119 return false
120}
121
122func (p *textParser) skipWhitespace() {
123 i := 0
124 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
125 if p.s[i] == '#' {
126 // comment; skip to end of line or input
127 for i < len(p.s) && p.s[i] != '\n' {
128 i++
129 }
130 if i == len(p.s) {
131 break
132 }
133 }
134 if p.s[i] == '\n' {
135 p.line++
136 }
137 i++
138 }
139 p.offset += i
140 p.s = p.s[i:len(p.s)]
141 if len(p.s) == 0 {
142 p.done = true
143 }
144}
145
146func (p *textParser) advance() {
147 // Skip whitespace
148 p.skipWhitespace()
149 if p.done {
150 return
151 }
152
153 // Start of non-whitespace
154 p.cur.err = nil
155 p.cur.offset, p.cur.line = p.offset, p.line
156 p.cur.unquoted = ""
157 switch p.s[0] {
David Symondsbe02a4a2012-12-06 15:20:41 +1100158 case '<', '>', '{', '}', ':', '[', ']', ';', ',':
Rob Pikeaaa3a622010-03-20 22:32:34 -0700159 // Single symbol
160 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
David Symonds162d0032012-06-28 09:44:46 -0700161 case '"', '\'':
Rob Pikeaaa3a622010-03-20 22:32:34 -0700162 // Quoted string
163 i := 1
David Symonds162d0032012-06-28 09:44:46 -0700164 for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700165 if p.s[i] == '\\' && i+1 < len(p.s) {
166 // skip escaped char
167 i++
168 }
169 i++
170 }
David Symonds162d0032012-06-28 09:44:46 -0700171 if i >= len(p.s) || p.s[i] != p.s[0] {
Rob Piked6420b82011-04-13 16:37:04 -0700172 p.errorf("unmatched quote")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700173 return
174 }
David Symondsfa94a1e2012-09-24 13:21:49 +1000175 unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700176 if err != nil {
Rob Piked6420b82011-04-13 16:37:04 -0700177 p.errorf("invalid quoted string %v", p.s[0:i+1])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700178 return
179 }
180 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
181 p.cur.unquoted = unq
182 default:
183 i := 0
184 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
185 i++
186 }
187 if i == 0 {
Rob Piked6420b82011-04-13 16:37:04 -0700188 p.errorf("unexpected byte %#x", p.s[0])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700189 return
190 }
191 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
192 }
193 p.offset += len(p.cur.value)
194}
195
David Symondsfa94a1e2012-09-24 13:21:49 +1000196var (
David Symondsa7f3a0f2013-09-09 13:32:33 +1000197 errBadUTF8 = errors.New("proto: bad UTF-8")
198 errBadHex = errors.New("proto: bad hexadecimal")
David Symondsfa94a1e2012-09-24 13:21:49 +1000199)
200
201func unquoteC(s string, quote rune) (string, error) {
202 // This is based on C++'s tokenizer.cc.
203 // Despite its name, this is *not* parsing C syntax.
204 // For instance, "\0" is an invalid quoted string.
205
206 // Avoid allocation in trivial cases.
207 simple := true
208 for _, r := range s {
209 if r == '\\' || r == quote {
210 simple = false
211 break
212 }
David Symonds162d0032012-06-28 09:44:46 -0700213 }
David Symondsfa94a1e2012-09-24 13:21:49 +1000214 if simple {
215 return s, nil
216 }
217
218 buf := make([]byte, 0, 3*len(s)/2)
219 for len(s) > 0 {
220 r, n := utf8.DecodeRuneInString(s)
221 if r == utf8.RuneError && n == 1 {
222 return "", errBadUTF8
223 }
224 s = s[n:]
225 if r != '\\' {
226 if r < utf8.RuneSelf {
227 buf = append(buf, byte(r))
228 } else {
229 buf = append(buf, string(r)...)
230 }
231 continue
232 }
233
234 ch, tail, err := unescape(s)
235 if err != nil {
236 return "", err
237 }
238 buf = append(buf, ch...)
239 s = tail
240 }
241 return string(buf), nil
David Symonds162d0032012-06-28 09:44:46 -0700242}
243
David Symondsfa94a1e2012-09-24 13:21:49 +1000244func unescape(s string) (ch string, tail string, err error) {
245 r, n := utf8.DecodeRuneInString(s)
246 if r == utf8.RuneError && n == 1 {
247 return "", "", errBadUTF8
David Symonds162d0032012-06-28 09:44:46 -0700248 }
David Symondsfa94a1e2012-09-24 13:21:49 +1000249 s = s[n:]
250 switch r {
251 case 'a':
252 return "\a", s, nil
253 case 'b':
254 return "\b", s, nil
255 case 'f':
256 return "\f", s, nil
257 case 'n':
258 return "\n", s, nil
259 case 'r':
260 return "\r", s, nil
261 case 't':
262 return "\t", s, nil
263 case 'v':
264 return "\v", s, nil
265 case '?':
266 return "?", s, nil // trigraph workaround
267 case '\'', '"', '\\':
268 return string(r), s, nil
269 case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
270 if len(s) < 2 {
271 return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
272 }
273 base := 8
274 ss := s[:2]
275 s = s[2:]
276 if r == 'x' || r == 'X' {
277 base = 16
278 } else {
279 ss = string(r) + ss
280 }
281 i, err := strconv.ParseUint(ss, base, 8)
282 if err != nil {
283 return "", "", err
284 }
285 return string([]byte{byte(i)}), s, nil
286 case 'u', 'U':
287 n := 4
288 if r == 'U' {
289 n = 8
290 }
291 if len(s) < n {
292 return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
293 }
David Symonds162d0032012-06-28 09:44:46 -0700294
David Symondsfa94a1e2012-09-24 13:21:49 +1000295 bs := make([]byte, n/2)
296 for i := 0; i < n; i += 2 {
297 a, ok1 := unhex(s[i])
298 b, ok2 := unhex(s[i+1])
299 if !ok1 || !ok2 {
300 return "", "", errBadHex
301 }
302 bs[i/2] = a<<4 | b
303 }
304 s = s[n:]
305 return string(bs), s, nil
306 }
307 return "", "", fmt.Errorf(`unknown escape \%c`, r)
308}
309
310// Adapted from src/pkg/strconv/quote.go.
311func unhex(b byte) (v byte, ok bool) {
312 switch {
313 case '0' <= b && b <= '9':
314 return b - '0', true
315 case 'a' <= b && b <= 'f':
316 return b - 'a' + 10, true
317 case 'A' <= b && b <= 'F':
318 return b - 'A' + 10, true
319 }
320 return 0, false
David Symonds183124e2012-03-23 13:20:23 +1100321}
322
Rob Pikeaaa3a622010-03-20 22:32:34 -0700323// Back off the parser by one token. Can only be done between calls to next().
324// It makes the next advance() a no-op.
325func (p *textParser) back() { p.backed = true }
326
327// Advances the parser and returns the new current token.
328func (p *textParser) next() *token {
329 if p.backed || p.done {
330 p.backed = false
331 return &p.cur
332 }
333 p.advance()
334 if p.done {
335 p.cur.value = ""
336 } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
337 // Look for multiple quoted strings separated by whitespace,
338 // and concatenate them.
339 cat := p.cur
340 for {
341 p.skipWhitespace()
342 if p.done || p.s[0] != '"' {
343 break
344 }
345 p.advance()
346 if p.cur.err != nil {
347 return &p.cur
348 }
349 cat.value += " " + p.cur.value
350 cat.unquoted += p.cur.unquoted
351 }
352 p.done = false // parser may have seen EOF, but we want to return cat
353 p.cur = cat
354 }
355 return &p.cur
356}
357
David Symonds2a1c6b92014-10-12 16:42:41 +1100358// Return a RequiredNotSetError indicating which required field was not set.
359func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError {
Rob Pike97e934d2011-04-11 12:52:49 -0700360 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700361 sprops := GetProperties(st)
362 for i := 0; i < st.NumField(); i++ {
Rob Pike97e934d2011-04-11 12:52:49 -0700363 if !isNil(sv.Field(i)) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700364 continue
365 }
366
367 props := sprops.Prop[i]
368 if props.Required {
David Symonds2a1c6b92014-10-12 16:42:41 +1100369 return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
Rob Pikeaaa3a622010-03-20 22:32:34 -0700370 }
371 }
David Symonds2a1c6b92014-10-12 16:42:41 +1100372 return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
Rob Pikeaaa3a622010-03-20 22:32:34 -0700373}
374
375// Returns the index in the struct for the named field, as well as the parsed tag properties.
Rob Pike97e934d2011-04-11 12:52:49 -0700376func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700377 sprops := GetProperties(st)
David Symonds2bba1b22012-09-26 14:53:08 +1000378 i, ok := sprops.decoderOrigNames[name]
David Symonds79eae332010-10-16 11:33:20 +1100379 if ok {
380 return i, sprops.Prop[i], true
Rob Pikeaaa3a622010-03-20 22:32:34 -0700381 }
382 return -1, nil, false
383}
384
David Symonds54531052011-12-08 12:00:31 +1100385// Consume a ':' from the input stream (if the next token is a colon),
386// returning an error if a colon is needed but not present.
387func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
388 tok := p.next()
389 if tok.err != nil {
390 return tok.err
391 }
392 if tok.value != ":" {
393 // Colon is optional when the field is a group or message.
394 needColon := true
395 switch props.Wire {
396 case "group":
397 needColon = false
398 case "bytes":
399 // A "bytes" field is either a message, a string, or a repeated field;
400 // those three become *T, *string and []T respectively, so we can check for
401 // this field being a pointer to a non-string.
402 if typ.Kind() == reflect.Ptr {
403 // *T or *string
404 if typ.Elem().Kind() == reflect.String {
405 break
406 }
407 } else if typ.Kind() == reflect.Slice {
408 // []T or []*T
409 if typ.Elem().Kind() != reflect.Ptr {
410 break
411 }
412 }
413 needColon = false
414 }
415 if needColon {
416 return p.errorf("expected ':', found %q", tok.value)
417 }
418 p.back()
419 }
420 return nil
421}
422
David Symonds2a1c6b92014-10-12 16:42:41 +1100423func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
Rob Pike97e934d2011-04-11 12:52:49 -0700424 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700425 reqCount := GetProperties(st).reqCount
David Symonds2a1c6b92014-10-12 16:42:41 +1100426 var reqFieldErr error
David Symonds8a099d02014-10-30 12:40:51 +1100427 fieldSet := make(map[string]bool)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700428 // A struct is a sequence of "name: value", terminated by one of
David Symonds54531052011-12-08 12:00:31 +1100429 // '>' or '}', or the end of the input. A name may also be
430 // "[extension]".
Rob Pikeaaa3a622010-03-20 22:32:34 -0700431 for {
432 tok := p.next()
433 if tok.err != nil {
434 return tok.err
435 }
436 if tok.value == terminator {
437 break
438 }
David Symonds54531052011-12-08 12:00:31 +1100439 if tok.value == "[" {
440 // Looks like an extension.
441 //
442 // TODO: Check whether we need to handle
443 // namespace rooted names (e.g. ".something.Foo").
444 tok = p.next()
445 if tok.err != nil {
446 return tok.err
447 }
448 var desc *ExtensionDesc
449 // This could be faster, but it's functional.
450 // TODO: Do something smarter than a linear scan.
David Symonds9f60f432012-06-14 09:45:25 +1000451 for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
David Symonds54531052011-12-08 12:00:31 +1100452 if d.Name == tok.value {
453 desc = d
454 break
Rob Pikeaaa3a622010-03-20 22:32:34 -0700455 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700456 }
David Symonds54531052011-12-08 12:00:31 +1100457 if desc == nil {
458 return p.errorf("unrecognized extension %q", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700459 }
David Symonds54531052011-12-08 12:00:31 +1100460 // Check the extension terminator.
461 tok = p.next()
462 if tok.err != nil {
463 return tok.err
464 }
465 if tok.value != "]" {
466 return p.errorf("unrecognized extension terminator %q", tok.value)
467 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700468
David Symonds54531052011-12-08 12:00:31 +1100469 props := &Properties{}
470 props.Parse(desc.Tag)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700471
David Symonds54531052011-12-08 12:00:31 +1100472 typ := reflect.TypeOf(desc.ExtensionType)
473 if err := p.checkForColon(props, typ); err != nil {
474 return err
475 }
476
David Symonds61826da2012-05-05 09:31:28 +1000477 rep := desc.repeated()
478
David Symonds54531052011-12-08 12:00:31 +1100479 // Read the extension structure, and set it in
480 // the value we're constructing.
David Symonds61826da2012-05-05 09:31:28 +1000481 var ext reflect.Value
482 if !rep {
483 ext = reflect.New(typ).Elem()
484 } else {
485 ext = reflect.New(typ.Elem()).Elem()
486 }
David Symonds54531052011-12-08 12:00:31 +1100487 if err := p.readAny(ext, props); err != nil {
David Symonds2a1c6b92014-10-12 16:42:41 +1100488 if _, ok := err.(*RequiredNotSetError); !ok {
489 return err
490 }
491 reqFieldErr = err
David Symonds54531052011-12-08 12:00:31 +1100492 }
David Symonds61826da2012-05-05 09:31:28 +1000493 ep := sv.Addr().Interface().(extendableProto)
494 if !rep {
495 SetExtension(ep, desc, ext.Interface())
496 } else {
497 old, err := GetExtension(ep, desc)
498 var sl reflect.Value
499 if err == nil {
500 sl = reflect.ValueOf(old) // existing slice
501 } else {
502 sl = reflect.MakeSlice(typ, 0, 1)
503 }
504 sl = reflect.Append(sl, ext)
505 SetExtension(ep, desc, sl.Interface())
506 }
David Symonds54531052011-12-08 12:00:31 +1100507 } else {
508 // This is a normal, non-extension field.
David Symonds8a099d02014-10-30 12:40:51 +1100509 name := tok.value
510 fi, props, ok := structFieldByName(st, name)
David Symonds54531052011-12-08 12:00:31 +1100511 if !ok {
David Symonds8a099d02014-10-30 12:40:51 +1100512 return p.errorf("unknown field name %q in %v", name, st)
David Symonds54531052011-12-08 12:00:31 +1100513 }
514
David Symonds20370902013-03-23 17:20:01 +1100515 dst := sv.Field(fi)
David Symonds20370902013-03-23 17:20:01 +1100516
David Symonds54531052011-12-08 12:00:31 +1100517 // Check that it's not already set if it's not a repeated field.
David Symonds8a099d02014-10-30 12:40:51 +1100518 if !props.Repeated && fieldSet[name] {
519 return p.errorf("non-repeated field %q was repeated", name)
David Symonds54531052011-12-08 12:00:31 +1100520 }
521
522 if err := p.checkForColon(props, st.Field(fi).Type); err != nil {
523 return err
524 }
525
526 // Parse into the field.
David Symonds8a099d02014-10-30 12:40:51 +1100527 fieldSet[name] = true
David Symonds007ed9d2012-07-24 10:59:36 +1000528 if err := p.readAny(dst, props); err != nil {
David Symonds2a1c6b92014-10-12 16:42:41 +1100529 if _, ok := err.(*RequiredNotSetError); !ok {
530 return err
531 }
532 reqFieldErr = err
533 } else if props.Required {
David Symonds54531052011-12-08 12:00:31 +1100534 reqCount--
535 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700536 }
David Symondsbe02a4a2012-12-06 15:20:41 +1100537
538 // For backward compatibility, permit a semicolon or comma after a field.
539 tok = p.next()
540 if tok.err != nil {
541 return tok.err
542 }
543 if tok.value != ";" && tok.value != "," {
544 p.back()
545 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700546 }
547
548 if reqCount > 0 {
549 return p.missingRequiredFieldError(sv)
550 }
David Symonds2a1c6b92014-10-12 16:42:41 +1100551 return reqFieldErr
Rob Pikeaaa3a622010-03-20 22:32:34 -0700552}
553
David Symonds2a1c6b92014-10-12 16:42:41 +1100554func (p *textParser) readAny(v reflect.Value, props *Properties) error {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700555 tok := p.next()
556 if tok.err != nil {
557 return tok.err
558 }
559 if tok.value == "" {
Rob Piked6420b82011-04-13 16:37:04 -0700560 return p.errorf("unexpected EOF")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700561 }
562
Rob Pike97e934d2011-04-11 12:52:49 -0700563 switch fv := v; fv.Kind() {
564 case reflect.Slice:
565 at := v.Type()
Rob Pikeab5b8022010-06-21 17:47:58 -0700566 if at.Elem().Kind() == reflect.Uint8 {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700567 // Special case for []byte
David Symonds162d0032012-06-28 09:44:46 -0700568 if tok.value[0] != '"' && tok.value[0] != '\'' {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700569 // Deliberately written out here, as the error after
570 // this switch statement would write "invalid []byte: ...",
571 // which is not as user-friendly.
Rob Piked6420b82011-04-13 16:37:04 -0700572 return p.errorf("invalid string: %v", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700573 }
574 bytes := []byte(tok.unquoted)
Nigel Tao4ede8452011-04-28 11:27:25 +1000575 fv.Set(reflect.ValueOf(bytes))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700576 return nil
577 }
578 // Repeated field. May already exist.
David Symonds79eae332010-10-16 11:33:20 +1100579 flen := fv.Len()
580 if flen == fv.Cap() {
581 nav := reflect.MakeSlice(at, flen, 2*flen+1)
Rob Pike48fd4a42010-12-14 23:40:41 -0800582 reflect.Copy(nav, fv)
David Symonds79eae332010-10-16 11:33:20 +1100583 fv.Set(nav)
584 }
585 fv.SetLen(flen + 1)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700586
587 // Read one.
588 p.back()
David Symondsef8f0e82011-10-13 12:57:34 +1100589 return p.readAny(fv.Index(flen), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700590 case reflect.Bool:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700591 // Either "true", "false", 1 or 0.
592 switch tok.value {
593 case "true", "1":
Rob Pike97e934d2011-04-11 12:52:49 -0700594 fv.SetBool(true)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700595 return nil
596 case "false", "0":
Rob Pike97e934d2011-04-11 12:52:49 -0700597 fv.SetBool(false)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700598 return nil
599 }
Rob Pike97e934d2011-04-11 12:52:49 -0700600 case reflect.Float32, reflect.Float64:
David Symonds6bd081e2012-06-28 10:46:25 -0700601 v := tok.value
David Symondsbe02a4a2012-12-06 15:20:41 +1100602 // Ignore 'f' for compatibility with output generated by C++, but don't
603 // remove 'f' when the value is "-inf" or "inf".
604 if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
David Symonds6bd081e2012-06-28 10:46:25 -0700605 v = v[:len(v)-1]
606 }
607 if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
Rob Pike97e934d2011-04-11 12:52:49 -0700608 fv.SetFloat(f)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700609 return nil
610 }
Rob Pike19b2dbb2011-04-11 16:49:15 -0700611 case reflect.Int32:
David Symonds32612dd2012-06-15 07:59:05 -0700612 if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700613 fv.SetInt(x)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700614 return nil
Rob Pike19b2dbb2011-04-11 16:49:15 -0700615 }
David Symonds8bb628d2014-07-22 13:49:35 +1000616
Rob Pike19b2dbb2011-04-11 16:49:15 -0700617 if len(props.Enum) == 0 {
618 break
619 }
620 m, ok := enumValueMaps[props.Enum]
621 if !ok {
622 break
623 }
624 x, ok := m[tok.value]
625 if !ok {
626 break
627 }
628 fv.SetInt(int64(x))
629 return nil
630 case reflect.Int64:
David Symonds32612dd2012-06-15 07:59:05 -0700631 if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700632 fv.SetInt(x)
633 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700634 }
David Symonds8bb628d2014-07-22 13:49:35 +1000635
Rob Pike97e934d2011-04-11 12:52:49 -0700636 case reflect.Ptr:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700637 // A basic field (indirected through pointer), or a repeated message/group
638 p.back()
Rob Pikeccd260c2011-04-18 13:13:04 -0700639 fv.Set(reflect.New(fv.Type().Elem()))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700640 return p.readAny(fv.Elem(), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700641 case reflect.String:
David Symonds162d0032012-06-28 09:44:46 -0700642 if tok.value[0] == '"' || tok.value[0] == '\'' {
Rob Pike97e934d2011-04-11 12:52:49 -0700643 fv.SetString(tok.unquoted)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700644 return nil
645 }
Rob Pike97e934d2011-04-11 12:52:49 -0700646 case reflect.Struct:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700647 var terminator string
648 switch tok.value {
649 case "{":
650 terminator = "}"
651 case "<":
652 terminator = ">"
653 default:
Rob Piked6420b82011-04-13 16:37:04 -0700654 return p.errorf("expected '{' or '<', found %q", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700655 }
David Symonds81177532014-11-20 14:33:40 +1100656 // TODO: Handle nested messages which implement encoding.TextUnmarshaler.
Rob Pikeaaa3a622010-03-20 22:32:34 -0700657 return p.readStruct(fv, terminator)
Rob Pike19b2dbb2011-04-11 16:49:15 -0700658 case reflect.Uint32:
David Symonds32612dd2012-06-15 07:59:05 -0700659 if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700660 fv.SetUint(uint64(x))
661 return nil
662 }
663 case reflect.Uint64:
David Symonds32612dd2012-06-15 07:59:05 -0700664 if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700665 fv.SetUint(x)
666 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700667 }
668 }
Rob Piked6420b82011-04-13 16:37:04 -0700669 return p.errorf("invalid %v: %v", v.Type(), tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700670}
671
David Symonds501f7db2013-08-05 13:53:28 +1000672// UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
673// before starting to unmarshal, so any existing data in pb is always removed.
David Symonds2a1c6b92014-10-12 16:42:41 +1100674// If a required field is not set and no other error occurs,
675// UnmarshalText returns *RequiredNotSetError.
David Symonds9f60f432012-06-14 09:45:25 +1000676func UnmarshalText(s string, pb Message) error {
David Symonds81177532014-11-20 14:33:40 +1100677 if um, ok := pb.(encoding.TextUnmarshaler); ok {
David Symonds267e8052014-02-19 14:50:51 +1100678 err := um.UnmarshalText([]byte(s))
679 return err
680 }
David Symonds501f7db2013-08-05 13:53:28 +1000681 pb.Reset()
Nigel Tao4ede8452011-04-28 11:27:25 +1000682 v := reflect.ValueOf(pb)
David Symondsa9cda212011-04-15 01:23:17 -0700683 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700684 return pe
685 }
686 return nil
687}