blob: 0e76e13a70bb29b3d306d6833d5892f39b30f40f [file] [log] [blame]
Rob Pikeaaa3a622010-03-20 22:32:34 -07001// Go support for Protocol Buffers - Google's data interchange format
2//
3// Copyright 2010 Google Inc. All rights reserved.
4// http://code.google.com/p/goprotobuf/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32package proto
33
34// Functions for parsing the Text protocol buffer format.
David Symonds54531052011-12-08 12:00:31 +110035// TODO: message sets.
Rob Pikeaaa3a622010-03-20 22:32:34 -070036
37import (
38 "fmt"
Rob Pikeaaa3a622010-03-20 22:32:34 -070039 "reflect"
40 "strconv"
41)
42
Rob Pikeaaa3a622010-03-20 22:32:34 -070043type ParseError struct {
44 Message string
45 Line int // 1-based line number
46 Offset int // 0-based byte offset from start of input
47}
48
Rob Pikea17fdd92011-11-02 12:43:05 -070049func (p *ParseError) Error() string {
Rob Pikeaaa3a622010-03-20 22:32:34 -070050 if p.Line == 1 {
51 // show offset only for first line
52 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
53 }
54 return fmt.Sprintf("line %d: %v", p.Line, p.Message)
55}
56
57type token struct {
58 value string
59 err *ParseError
60 line int // line number
61 offset int // byte number from start of input, not start of line
62 unquoted string // the unquoted version of value, if it was a quoted string
63}
64
65func (t *token) String() string {
66 if t.err == nil {
67 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
68 }
69 return fmt.Sprintf("parse error: %v", t.err)
70}
71
72type textParser struct {
73 s string // remaining input
74 done bool // whether the parsing is finished (success or error)
75 backed bool // whether back() was called
76 offset, line int
77 cur token
78}
79
80func newTextParser(s string) *textParser {
81 p := new(textParser)
82 p.s = s
83 p.line = 1
84 p.cur.line = 1
85 return p
86}
87
Rob Piked6420b82011-04-13 16:37:04 -070088func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
Rob Pikead7cac72010-09-29 12:29:26 -070089 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
Rob Pikeaaa3a622010-03-20 22:32:34 -070090 p.cur.err = pe
91 p.done = true
92 return pe
93}
94
95// Numbers and identifiers are matched by [-+._A-Za-z0-9]
96func isIdentOrNumberChar(c byte) bool {
97 switch {
98 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
99 return true
100 case '0' <= c && c <= '9':
101 return true
102 }
103 switch c {
104 case '-', '+', '.', '_':
105 return true
106 }
107 return false
108}
109
110func isWhitespace(c byte) bool {
111 switch c {
112 case ' ', '\t', '\n', '\r':
113 return true
114 }
115 return false
116}
117
118func (p *textParser) skipWhitespace() {
119 i := 0
120 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
121 if p.s[i] == '#' {
122 // comment; skip to end of line or input
123 for i < len(p.s) && p.s[i] != '\n' {
124 i++
125 }
126 if i == len(p.s) {
127 break
128 }
129 }
130 if p.s[i] == '\n' {
131 p.line++
132 }
133 i++
134 }
135 p.offset += i
136 p.s = p.s[i:len(p.s)]
137 if len(p.s) == 0 {
138 p.done = true
139 }
140}
141
142func (p *textParser) advance() {
143 // Skip whitespace
144 p.skipWhitespace()
145 if p.done {
146 return
147 }
148
149 // Start of non-whitespace
150 p.cur.err = nil
151 p.cur.offset, p.cur.line = p.offset, p.line
152 p.cur.unquoted = ""
153 switch p.s[0] {
David Symonds54531052011-12-08 12:00:31 +1100154 case '<', '>', '{', '}', ':', '[', ']':
Rob Pikeaaa3a622010-03-20 22:32:34 -0700155 // Single symbol
156 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
157 case '"':
158 // Quoted string
159 i := 1
160 for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
161 if p.s[i] == '\\' && i+1 < len(p.s) {
162 // skip escaped char
163 i++
164 }
165 i++
166 }
167 if i >= len(p.s) || p.s[i] != '"' {
Rob Piked6420b82011-04-13 16:37:04 -0700168 p.errorf("unmatched quote")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700169 return
170 }
171 // TODO: Should be UnquoteC.
172 unq, err := strconv.Unquote(p.s[0 : i+1])
173 if err != nil {
Rob Piked6420b82011-04-13 16:37:04 -0700174 p.errorf("invalid quoted string %v", p.s[0:i+1])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700175 return
176 }
177 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
178 p.cur.unquoted = unq
179 default:
180 i := 0
181 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
182 i++
183 }
184 if i == 0 {
Rob Piked6420b82011-04-13 16:37:04 -0700185 p.errorf("unexpected byte %#x", p.s[0])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700186 return
187 }
188 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
189 }
190 p.offset += len(p.cur.value)
191}
192
193// Back off the parser by one token. Can only be done between calls to next().
194// It makes the next advance() a no-op.
195func (p *textParser) back() { p.backed = true }
196
197// Advances the parser and returns the new current token.
198func (p *textParser) next() *token {
199 if p.backed || p.done {
200 p.backed = false
201 return &p.cur
202 }
203 p.advance()
204 if p.done {
205 p.cur.value = ""
206 } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
207 // Look for multiple quoted strings separated by whitespace,
208 // and concatenate them.
209 cat := p.cur
210 for {
211 p.skipWhitespace()
212 if p.done || p.s[0] != '"' {
213 break
214 }
215 p.advance()
216 if p.cur.err != nil {
217 return &p.cur
218 }
219 cat.value += " " + p.cur.value
220 cat.unquoted += p.cur.unquoted
221 }
222 p.done = false // parser may have seen EOF, but we want to return cat
223 p.cur = cat
224 }
225 return &p.cur
226}
227
Rob Pikeaaa3a622010-03-20 22:32:34 -0700228// Return an error indicating which required field was not set.
Rob Pike97e934d2011-04-11 12:52:49 -0700229func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError {
230 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700231 sprops := GetProperties(st)
232 for i := 0; i < st.NumField(); i++ {
Rob Pike97e934d2011-04-11 12:52:49 -0700233 if !isNil(sv.Field(i)) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700234 continue
235 }
236
237 props := sprops.Prop[i]
238 if props.Required {
Rob Piked6420b82011-04-13 16:37:04 -0700239 return p.errorf("message %v missing required field %q", st, props.OrigName)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700240 }
241 }
Rob Piked6420b82011-04-13 16:37:04 -0700242 return p.errorf("message %v missing required field", st) // should not happen
Rob Pikeaaa3a622010-03-20 22:32:34 -0700243}
244
245// Returns the index in the struct for the named field, as well as the parsed tag properties.
Rob Pike97e934d2011-04-11 12:52:49 -0700246func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700247 sprops := GetProperties(st)
David Symonds79eae332010-10-16 11:33:20 +1100248 i, ok := sprops.origNames[name]
249 if ok {
250 return i, sprops.Prop[i], true
Rob Pikeaaa3a622010-03-20 22:32:34 -0700251 }
252 return -1, nil, false
253}
254
David Symonds54531052011-12-08 12:00:31 +1100255// Consume a ':' from the input stream (if the next token is a colon),
256// returning an error if a colon is needed but not present.
257func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
258 tok := p.next()
259 if tok.err != nil {
260 return tok.err
261 }
262 if tok.value != ":" {
263 // Colon is optional when the field is a group or message.
264 needColon := true
265 switch props.Wire {
266 case "group":
267 needColon = false
268 case "bytes":
269 // A "bytes" field is either a message, a string, or a repeated field;
270 // those three become *T, *string and []T respectively, so we can check for
271 // this field being a pointer to a non-string.
272 if typ.Kind() == reflect.Ptr {
273 // *T or *string
274 if typ.Elem().Kind() == reflect.String {
275 break
276 }
277 } else if typ.Kind() == reflect.Slice {
278 // []T or []*T
279 if typ.Elem().Kind() != reflect.Ptr {
280 break
281 }
282 }
283 needColon = false
284 }
285 if needColon {
286 return p.errorf("expected ':', found %q", tok.value)
287 }
288 p.back()
289 }
290 return nil
291}
292
Rob Pike97e934d2011-04-11 12:52:49 -0700293func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError {
294 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700295 reqCount := GetProperties(st).reqCount
296 // A struct is a sequence of "name: value", terminated by one of
David Symonds54531052011-12-08 12:00:31 +1100297 // '>' or '}', or the end of the input. A name may also be
298 // "[extension]".
Rob Pikeaaa3a622010-03-20 22:32:34 -0700299 for {
300 tok := p.next()
301 if tok.err != nil {
302 return tok.err
303 }
304 if tok.value == terminator {
305 break
306 }
David Symonds54531052011-12-08 12:00:31 +1100307 if tok.value == "[" {
308 // Looks like an extension.
309 //
310 // TODO: Check whether we need to handle
311 // namespace rooted names (e.g. ".something.Foo").
312 tok = p.next()
313 if tok.err != nil {
314 return tok.err
315 }
316 var desc *ExtensionDesc
317 // This could be faster, but it's functional.
318 // TODO: Do something smarter than a linear scan.
319 for _, d := range RegisteredExtensions(reflect.New(st).Interface()) {
320 if d.Name == tok.value {
321 desc = d
322 break
Rob Pikeaaa3a622010-03-20 22:32:34 -0700323 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700324 }
David Symonds54531052011-12-08 12:00:31 +1100325 if desc == nil {
326 return p.errorf("unrecognized extension %q", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700327 }
David Symonds54531052011-12-08 12:00:31 +1100328 // Check the extension terminator.
329 tok = p.next()
330 if tok.err != nil {
331 return tok.err
332 }
333 if tok.value != "]" {
334 return p.errorf("unrecognized extension terminator %q", tok.value)
335 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700336
David Symonds54531052011-12-08 12:00:31 +1100337 props := &Properties{}
338 props.Parse(desc.Tag)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700339
David Symonds54531052011-12-08 12:00:31 +1100340 typ := reflect.TypeOf(desc.ExtensionType)
341 if err := p.checkForColon(props, typ); err != nil {
342 return err
343 }
344
345 // Read the extension structure, and set it in
346 // the value we're constructing.
347 ext := reflect.New(typ).Elem()
348 if err := p.readAny(ext, props); err != nil {
349 return err
350 }
351 SetExtension(sv.Addr().Interface().(extendableProto),
352 desc, ext.Interface())
353 } else {
354 // This is a normal, non-extension field.
355 fi, props, ok := structFieldByName(st, tok.value)
356 if !ok {
357 return p.errorf("unknown field name %q in %v", tok.value, st)
358 }
359
360 // Check that it's not already set if it's not a repeated field.
361 if !props.Repeated && !isNil(sv.Field(fi)) {
362 return p.errorf("non-repeated field %q was repeated", tok.value)
363 }
364
365 if err := p.checkForColon(props, st.Field(fi).Type); err != nil {
366 return err
367 }
368
369 // Parse into the field.
370 if err := p.readAny(sv.Field(fi), props); err != nil {
371 return err
372 }
373
374 if props.Required {
375 reqCount--
376 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700377 }
378 }
379
380 if reqCount > 0 {
381 return p.missingRequiredFieldError(sv)
382 }
383 return nil
384}
385
386const (
387 minInt32 = -1 << 31
388 maxInt32 = 1<<31 - 1
389 maxUint32 = 1<<32 - 1
390)
391
392func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
393 tok := p.next()
394 if tok.err != nil {
395 return tok.err
396 }
397 if tok.value == "" {
Rob Piked6420b82011-04-13 16:37:04 -0700398 return p.errorf("unexpected EOF")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700399 }
400
Rob Pike97e934d2011-04-11 12:52:49 -0700401 switch fv := v; fv.Kind() {
402 case reflect.Slice:
403 at := v.Type()
Rob Pikeab5b8022010-06-21 17:47:58 -0700404 if at.Elem().Kind() == reflect.Uint8 {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700405 // Special case for []byte
406 if tok.value[0] != '"' {
407 // Deliberately written out here, as the error after
408 // this switch statement would write "invalid []byte: ...",
409 // which is not as user-friendly.
Rob Piked6420b82011-04-13 16:37:04 -0700410 return p.errorf("invalid string: %v", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700411 }
412 bytes := []byte(tok.unquoted)
Nigel Tao4ede8452011-04-28 11:27:25 +1000413 fv.Set(reflect.ValueOf(bytes))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700414 return nil
415 }
416 // Repeated field. May already exist.
David Symonds79eae332010-10-16 11:33:20 +1100417 flen := fv.Len()
418 if flen == fv.Cap() {
419 nav := reflect.MakeSlice(at, flen, 2*flen+1)
Rob Pike48fd4a42010-12-14 23:40:41 -0800420 reflect.Copy(nav, fv)
David Symonds79eae332010-10-16 11:33:20 +1100421 fv.Set(nav)
422 }
423 fv.SetLen(flen + 1)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700424
425 // Read one.
426 p.back()
David Symondsef8f0e82011-10-13 12:57:34 +1100427 return p.readAny(fv.Index(flen), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700428 case reflect.Bool:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700429 // Either "true", "false", 1 or 0.
430 switch tok.value {
431 case "true", "1":
Rob Pike97e934d2011-04-11 12:52:49 -0700432 fv.SetBool(true)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700433 return nil
434 case "false", "0":
Rob Pike97e934d2011-04-11 12:52:49 -0700435 fv.SetBool(false)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700436 return nil
437 }
Rob Pike97e934d2011-04-11 12:52:49 -0700438 case reflect.Float32, reflect.Float64:
Rob Pikeab5b8022010-06-21 17:47:58 -0700439 if f, err := strconv.AtofN(tok.value, fv.Type().Bits()); err == nil {
Rob Pike97e934d2011-04-11 12:52:49 -0700440 fv.SetFloat(f)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700441 return nil
442 }
Rob Pike19b2dbb2011-04-11 16:49:15 -0700443 case reflect.Int32:
444 if x, err := strconv.Atoi64(tok.value); err == nil && minInt32 <= x && x <= maxInt32 {
445 fv.SetInt(x)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700446 return nil
Rob Pike19b2dbb2011-04-11 16:49:15 -0700447 }
448 if len(props.Enum) == 0 {
449 break
450 }
451 m, ok := enumValueMaps[props.Enum]
452 if !ok {
453 break
454 }
455 x, ok := m[tok.value]
456 if !ok {
457 break
458 }
459 fv.SetInt(int64(x))
460 return nil
461 case reflect.Int64:
462 if x, err := strconv.Atoi64(tok.value); err == nil {
463 fv.SetInt(x)
464 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700465 }
Rob Pike97e934d2011-04-11 12:52:49 -0700466 case reflect.Ptr:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700467 // A basic field (indirected through pointer), or a repeated message/group
468 p.back()
Rob Pikeccd260c2011-04-18 13:13:04 -0700469 fv.Set(reflect.New(fv.Type().Elem()))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700470 return p.readAny(fv.Elem(), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700471 case reflect.String:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700472 if tok.value[0] == '"' {
Rob Pike97e934d2011-04-11 12:52:49 -0700473 fv.SetString(tok.unquoted)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700474 return nil
475 }
Rob Pike97e934d2011-04-11 12:52:49 -0700476 case reflect.Struct:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700477 var terminator string
478 switch tok.value {
479 case "{":
480 terminator = "}"
481 case "<":
482 terminator = ">"
483 default:
Rob Piked6420b82011-04-13 16:37:04 -0700484 return p.errorf("expected '{' or '<', found %q", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700485 }
486 return p.readStruct(fv, terminator)
Rob Pike19b2dbb2011-04-11 16:49:15 -0700487 case reflect.Uint32:
488 if x, err := strconv.Atoui64(tok.value); err == nil && x <= maxUint32 {
489 fv.SetUint(uint64(x))
490 return nil
491 }
492 case reflect.Uint64:
493 if x, err := strconv.Atoui64(tok.value); err == nil {
494 fv.SetUint(x)
495 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700496 }
497 }
Rob Piked6420b82011-04-13 16:37:04 -0700498 return p.errorf("invalid %v: %v", v.Type(), tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700499}
500
Rob Pikea17fdd92011-11-02 12:43:05 -0700501var notPtrStruct error = &ParseError{"destination is not a pointer to a struct", 0, 0}
Rob Pikeaaa3a622010-03-20 22:32:34 -0700502
503// UnmarshalText reads a protobuffer in Text format.
Rob Pikea17fdd92011-11-02 12:43:05 -0700504func UnmarshalText(s string, pb interface{}) error {
Nigel Tao4ede8452011-04-28 11:27:25 +1000505 v := reflect.ValueOf(pb)
David Symondsa9cda212011-04-15 01:23:17 -0700506 if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Struct {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700507 return notPtrStruct
508 }
David Symondsa9cda212011-04-15 01:23:17 -0700509 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700510 return pe
511 }
512 return nil
513}