blob: 2124308f228b373ba630b70d0c42d638ed88b2d9 [file] [log] [blame]
Rob Pikeaaa3a622010-03-20 22:32:34 -07001// Go support for Protocol Buffers - Google's data interchange format
2//
3// Copyright 2010 Google Inc. All rights reserved.
4// http://code.google.com/p/goprotobuf/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32package proto
33
34// Functions for parsing the Text protocol buffer format.
David Symonds54531052011-12-08 12:00:31 +110035// TODO: message sets.
Rob Pikeaaa3a622010-03-20 22:32:34 -070036
37import (
38 "fmt"
Rob Pikeaaa3a622010-03-20 22:32:34 -070039 "reflect"
40 "strconv"
David Symonds183124e2012-03-23 13:20:23 +110041 "strings"
Rob Pikeaaa3a622010-03-20 22:32:34 -070042)
43
Rob Pikeaaa3a622010-03-20 22:32:34 -070044type ParseError struct {
45 Message string
46 Line int // 1-based line number
47 Offset int // 0-based byte offset from start of input
48}
49
Rob Pikea17fdd92011-11-02 12:43:05 -070050func (p *ParseError) Error() string {
Rob Pikeaaa3a622010-03-20 22:32:34 -070051 if p.Line == 1 {
52 // show offset only for first line
53 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
54 }
55 return fmt.Sprintf("line %d: %v", p.Line, p.Message)
56}
57
58type token struct {
59 value string
60 err *ParseError
61 line int // line number
62 offset int // byte number from start of input, not start of line
63 unquoted string // the unquoted version of value, if it was a quoted string
64}
65
66func (t *token) String() string {
67 if t.err == nil {
68 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
69 }
70 return fmt.Sprintf("parse error: %v", t.err)
71}
72
73type textParser struct {
74 s string // remaining input
75 done bool // whether the parsing is finished (success or error)
76 backed bool // whether back() was called
77 offset, line int
78 cur token
79}
80
81func newTextParser(s string) *textParser {
82 p := new(textParser)
83 p.s = s
84 p.line = 1
85 p.cur.line = 1
86 return p
87}
88
Rob Piked6420b82011-04-13 16:37:04 -070089func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
Rob Pikead7cac72010-09-29 12:29:26 -070090 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
Rob Pikeaaa3a622010-03-20 22:32:34 -070091 p.cur.err = pe
92 p.done = true
93 return pe
94}
95
96// Numbers and identifiers are matched by [-+._A-Za-z0-9]
97func isIdentOrNumberChar(c byte) bool {
98 switch {
99 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
100 return true
101 case '0' <= c && c <= '9':
102 return true
103 }
104 switch c {
105 case '-', '+', '.', '_':
106 return true
107 }
108 return false
109}
110
111func isWhitespace(c byte) bool {
112 switch c {
113 case ' ', '\t', '\n', '\r':
114 return true
115 }
116 return false
117}
118
119func (p *textParser) skipWhitespace() {
120 i := 0
121 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
122 if p.s[i] == '#' {
123 // comment; skip to end of line or input
124 for i < len(p.s) && p.s[i] != '\n' {
125 i++
126 }
127 if i == len(p.s) {
128 break
129 }
130 }
131 if p.s[i] == '\n' {
132 p.line++
133 }
134 i++
135 }
136 p.offset += i
137 p.s = p.s[i:len(p.s)]
138 if len(p.s) == 0 {
139 p.done = true
140 }
141}
142
143func (p *textParser) advance() {
144 // Skip whitespace
145 p.skipWhitespace()
146 if p.done {
147 return
148 }
149
150 // Start of non-whitespace
151 p.cur.err = nil
152 p.cur.offset, p.cur.line = p.offset, p.line
153 p.cur.unquoted = ""
154 switch p.s[0] {
David Symonds54531052011-12-08 12:00:31 +1100155 case '<', '>', '{', '}', ':', '[', ']':
Rob Pikeaaa3a622010-03-20 22:32:34 -0700156 // Single symbol
157 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
158 case '"':
159 // Quoted string
160 i := 1
161 for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
162 if p.s[i] == '\\' && i+1 < len(p.s) {
163 // skip escaped char
164 i++
165 }
166 i++
167 }
168 if i >= len(p.s) || p.s[i] != '"' {
Rob Piked6420b82011-04-13 16:37:04 -0700169 p.errorf("unmatched quote")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700170 return
171 }
David Symonds183124e2012-03-23 13:20:23 +1100172 unq, err := unquoteC(p.s[0 : i+1])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700173 if err != nil {
Rob Piked6420b82011-04-13 16:37:04 -0700174 p.errorf("invalid quoted string %v", p.s[0:i+1])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700175 return
176 }
177 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
178 p.cur.unquoted = unq
179 default:
180 i := 0
181 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
182 i++
183 }
184 if i == 0 {
Rob Piked6420b82011-04-13 16:37:04 -0700185 p.errorf("unexpected byte %#x", p.s[0])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700186 return
187 }
188 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
189 }
190 p.offset += len(p.cur.value)
191}
192
David Symonds183124e2012-03-23 13:20:23 +1100193func unquoteC(s string) (string, error) {
194 // A notable divergence between quoted string literals in Go
195 // and what is acceptable for text format protocol buffers:
196 // the former considers \' invalid, but the latter considers it valid.
197 s = strings.Replace(s, `\'`, "'", -1)
198 return strconv.Unquote(s)
199}
200
Rob Pikeaaa3a622010-03-20 22:32:34 -0700201// Back off the parser by one token. Can only be done between calls to next().
202// It makes the next advance() a no-op.
203func (p *textParser) back() { p.backed = true }
204
205// Advances the parser and returns the new current token.
206func (p *textParser) next() *token {
207 if p.backed || p.done {
208 p.backed = false
209 return &p.cur
210 }
211 p.advance()
212 if p.done {
213 p.cur.value = ""
214 } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
215 // Look for multiple quoted strings separated by whitespace,
216 // and concatenate them.
217 cat := p.cur
218 for {
219 p.skipWhitespace()
220 if p.done || p.s[0] != '"' {
221 break
222 }
223 p.advance()
224 if p.cur.err != nil {
225 return &p.cur
226 }
227 cat.value += " " + p.cur.value
228 cat.unquoted += p.cur.unquoted
229 }
230 p.done = false // parser may have seen EOF, but we want to return cat
231 p.cur = cat
232 }
233 return &p.cur
234}
235
Rob Pikeaaa3a622010-03-20 22:32:34 -0700236// Return an error indicating which required field was not set.
Rob Pike97e934d2011-04-11 12:52:49 -0700237func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError {
238 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700239 sprops := GetProperties(st)
240 for i := 0; i < st.NumField(); i++ {
Rob Pike97e934d2011-04-11 12:52:49 -0700241 if !isNil(sv.Field(i)) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700242 continue
243 }
244
245 props := sprops.Prop[i]
246 if props.Required {
Rob Piked6420b82011-04-13 16:37:04 -0700247 return p.errorf("message %v missing required field %q", st, props.OrigName)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700248 }
249 }
Rob Piked6420b82011-04-13 16:37:04 -0700250 return p.errorf("message %v missing required field", st) // should not happen
Rob Pikeaaa3a622010-03-20 22:32:34 -0700251}
252
253// Returns the index in the struct for the named field, as well as the parsed tag properties.
Rob Pike97e934d2011-04-11 12:52:49 -0700254func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700255 sprops := GetProperties(st)
David Symonds79eae332010-10-16 11:33:20 +1100256 i, ok := sprops.origNames[name]
257 if ok {
258 return i, sprops.Prop[i], true
Rob Pikeaaa3a622010-03-20 22:32:34 -0700259 }
260 return -1, nil, false
261}
262
David Symonds54531052011-12-08 12:00:31 +1100263// Consume a ':' from the input stream (if the next token is a colon),
264// returning an error if a colon is needed but not present.
265func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
266 tok := p.next()
267 if tok.err != nil {
268 return tok.err
269 }
270 if tok.value != ":" {
271 // Colon is optional when the field is a group or message.
272 needColon := true
273 switch props.Wire {
274 case "group":
275 needColon = false
276 case "bytes":
277 // A "bytes" field is either a message, a string, or a repeated field;
278 // those three become *T, *string and []T respectively, so we can check for
279 // this field being a pointer to a non-string.
280 if typ.Kind() == reflect.Ptr {
281 // *T or *string
282 if typ.Elem().Kind() == reflect.String {
283 break
284 }
285 } else if typ.Kind() == reflect.Slice {
286 // []T or []*T
287 if typ.Elem().Kind() != reflect.Ptr {
288 break
289 }
290 }
291 needColon = false
292 }
293 if needColon {
294 return p.errorf("expected ':', found %q", tok.value)
295 }
296 p.back()
297 }
298 return nil
299}
300
Rob Pike97e934d2011-04-11 12:52:49 -0700301func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError {
302 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700303 reqCount := GetProperties(st).reqCount
304 // A struct is a sequence of "name: value", terminated by one of
David Symonds54531052011-12-08 12:00:31 +1100305 // '>' or '}', or the end of the input. A name may also be
306 // "[extension]".
Rob Pikeaaa3a622010-03-20 22:32:34 -0700307 for {
308 tok := p.next()
309 if tok.err != nil {
310 return tok.err
311 }
312 if tok.value == terminator {
313 break
314 }
David Symonds54531052011-12-08 12:00:31 +1100315 if tok.value == "[" {
316 // Looks like an extension.
317 //
318 // TODO: Check whether we need to handle
319 // namespace rooted names (e.g. ".something.Foo").
320 tok = p.next()
321 if tok.err != nil {
322 return tok.err
323 }
324 var desc *ExtensionDesc
325 // This could be faster, but it's functional.
326 // TODO: Do something smarter than a linear scan.
327 for _, d := range RegisteredExtensions(reflect.New(st).Interface()) {
328 if d.Name == tok.value {
329 desc = d
330 break
Rob Pikeaaa3a622010-03-20 22:32:34 -0700331 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700332 }
David Symonds54531052011-12-08 12:00:31 +1100333 if desc == nil {
334 return p.errorf("unrecognized extension %q", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700335 }
David Symonds54531052011-12-08 12:00:31 +1100336 // Check the extension terminator.
337 tok = p.next()
338 if tok.err != nil {
339 return tok.err
340 }
341 if tok.value != "]" {
342 return p.errorf("unrecognized extension terminator %q", tok.value)
343 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700344
David Symonds54531052011-12-08 12:00:31 +1100345 props := &Properties{}
346 props.Parse(desc.Tag)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700347
David Symonds54531052011-12-08 12:00:31 +1100348 typ := reflect.TypeOf(desc.ExtensionType)
349 if err := p.checkForColon(props, typ); err != nil {
350 return err
351 }
352
353 // Read the extension structure, and set it in
354 // the value we're constructing.
355 ext := reflect.New(typ).Elem()
356 if err := p.readAny(ext, props); err != nil {
357 return err
358 }
359 SetExtension(sv.Addr().Interface().(extendableProto),
360 desc, ext.Interface())
361 } else {
362 // This is a normal, non-extension field.
363 fi, props, ok := structFieldByName(st, tok.value)
364 if !ok {
365 return p.errorf("unknown field name %q in %v", tok.value, st)
366 }
367
368 // Check that it's not already set if it's not a repeated field.
369 if !props.Repeated && !isNil(sv.Field(fi)) {
370 return p.errorf("non-repeated field %q was repeated", tok.value)
371 }
372
373 if err := p.checkForColon(props, st.Field(fi).Type); err != nil {
374 return err
375 }
376
377 // Parse into the field.
378 if err := p.readAny(sv.Field(fi), props); err != nil {
379 return err
380 }
381
382 if props.Required {
383 reqCount--
384 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700385 }
386 }
387
388 if reqCount > 0 {
389 return p.missingRequiredFieldError(sv)
390 }
391 return nil
392}
393
Rob Pikeaaa3a622010-03-20 22:32:34 -0700394func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
395 tok := p.next()
396 if tok.err != nil {
397 return tok.err
398 }
399 if tok.value == "" {
Rob Piked6420b82011-04-13 16:37:04 -0700400 return p.errorf("unexpected EOF")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700401 }
402
Rob Pike97e934d2011-04-11 12:52:49 -0700403 switch fv := v; fv.Kind() {
404 case reflect.Slice:
405 at := v.Type()
Rob Pikeab5b8022010-06-21 17:47:58 -0700406 if at.Elem().Kind() == reflect.Uint8 {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700407 // Special case for []byte
408 if tok.value[0] != '"' {
409 // Deliberately written out here, as the error after
410 // this switch statement would write "invalid []byte: ...",
411 // which is not as user-friendly.
Rob Piked6420b82011-04-13 16:37:04 -0700412 return p.errorf("invalid string: %v", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700413 }
414 bytes := []byte(tok.unquoted)
Nigel Tao4ede8452011-04-28 11:27:25 +1000415 fv.Set(reflect.ValueOf(bytes))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700416 return nil
417 }
418 // Repeated field. May already exist.
David Symonds79eae332010-10-16 11:33:20 +1100419 flen := fv.Len()
420 if flen == fv.Cap() {
421 nav := reflect.MakeSlice(at, flen, 2*flen+1)
Rob Pike48fd4a42010-12-14 23:40:41 -0800422 reflect.Copy(nav, fv)
David Symonds79eae332010-10-16 11:33:20 +1100423 fv.Set(nav)
424 }
425 fv.SetLen(flen + 1)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700426
427 // Read one.
428 p.back()
David Symondsef8f0e82011-10-13 12:57:34 +1100429 return p.readAny(fv.Index(flen), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700430 case reflect.Bool:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700431 // Either "true", "false", 1 or 0.
432 switch tok.value {
433 case "true", "1":
Rob Pike97e934d2011-04-11 12:52:49 -0700434 fv.SetBool(true)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700435 return nil
436 case "false", "0":
Rob Pike97e934d2011-04-11 12:52:49 -0700437 fv.SetBool(false)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700438 return nil
439 }
Rob Pike97e934d2011-04-11 12:52:49 -0700440 case reflect.Float32, reflect.Float64:
David Symonds93be46f2011-12-08 12:58:23 +1100441 if f, err := strconv.ParseFloat(tok.value, fv.Type().Bits()); err == nil {
Rob Pike97e934d2011-04-11 12:52:49 -0700442 fv.SetFloat(f)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700443 return nil
444 }
Rob Pike19b2dbb2011-04-11 16:49:15 -0700445 case reflect.Int32:
David Symonds93be46f2011-12-08 12:58:23 +1100446 if x, err := strconv.ParseInt(tok.value, 10, 32); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700447 fv.SetInt(x)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700448 return nil
Rob Pike19b2dbb2011-04-11 16:49:15 -0700449 }
450 if len(props.Enum) == 0 {
451 break
452 }
453 m, ok := enumValueMaps[props.Enum]
454 if !ok {
455 break
456 }
457 x, ok := m[tok.value]
458 if !ok {
459 break
460 }
461 fv.SetInt(int64(x))
462 return nil
463 case reflect.Int64:
David Symonds93be46f2011-12-08 12:58:23 +1100464 if x, err := strconv.ParseInt(tok.value, 10, 64); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700465 fv.SetInt(x)
466 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700467 }
Rob Pike97e934d2011-04-11 12:52:49 -0700468 case reflect.Ptr:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700469 // A basic field (indirected through pointer), or a repeated message/group
470 p.back()
Rob Pikeccd260c2011-04-18 13:13:04 -0700471 fv.Set(reflect.New(fv.Type().Elem()))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700472 return p.readAny(fv.Elem(), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700473 case reflect.String:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700474 if tok.value[0] == '"' {
Rob Pike97e934d2011-04-11 12:52:49 -0700475 fv.SetString(tok.unquoted)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700476 return nil
477 }
Rob Pike97e934d2011-04-11 12:52:49 -0700478 case reflect.Struct:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700479 var terminator string
480 switch tok.value {
481 case "{":
482 terminator = "}"
483 case "<":
484 terminator = ">"
485 default:
Rob Piked6420b82011-04-13 16:37:04 -0700486 return p.errorf("expected '{' or '<', found %q", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700487 }
488 return p.readStruct(fv, terminator)
Rob Pike19b2dbb2011-04-11 16:49:15 -0700489 case reflect.Uint32:
David Symonds93be46f2011-12-08 12:58:23 +1100490 if x, err := strconv.ParseUint(tok.value, 10, 32); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700491 fv.SetUint(uint64(x))
492 return nil
493 }
494 case reflect.Uint64:
David Symonds93be46f2011-12-08 12:58:23 +1100495 if x, err := strconv.ParseUint(tok.value, 10, 64); err == nil {
Rob Pike19b2dbb2011-04-11 16:49:15 -0700496 fv.SetUint(x)
497 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700498 }
499 }
Rob Piked6420b82011-04-13 16:37:04 -0700500 return p.errorf("invalid %v: %v", v.Type(), tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700501}
502
Rob Pikea17fdd92011-11-02 12:43:05 -0700503var notPtrStruct error = &ParseError{"destination is not a pointer to a struct", 0, 0}
Rob Pikeaaa3a622010-03-20 22:32:34 -0700504
505// UnmarshalText reads a protobuffer in Text format.
Rob Pikea17fdd92011-11-02 12:43:05 -0700506func UnmarshalText(s string, pb interface{}) error {
Nigel Tao4ede8452011-04-28 11:27:25 +1000507 v := reflect.ValueOf(pb)
David Symondsa9cda212011-04-15 01:23:17 -0700508 if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Struct {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700509 return notPtrStruct
510 }
David Symondsa9cda212011-04-15 01:23:17 -0700511 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700512 return pe
513 }
514 return nil
515}