blob: 714c803a659d96a837834beef3ae999d30462644 [file] [log] [blame]
Rob Pikeaaa3a622010-03-20 22:32:34 -07001// Go support for Protocol Buffers - Google's data interchange format
2//
3// Copyright 2010 Google Inc. All rights reserved.
4// http://code.google.com/p/goprotobuf/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32package proto
33
34// Functions for parsing the Text protocol buffer format.
David Symonds9f402812011-04-28 18:08:44 +100035// TODO: message sets, extensions.
Rob Pikeaaa3a622010-03-20 22:32:34 -070036
37import (
38 "fmt"
39 "os"
40 "reflect"
41 "strconv"
42)
43
44// ParseError satisfies the os.Error interface.
45type ParseError struct {
46 Message string
47 Line int // 1-based line number
48 Offset int // 0-based byte offset from start of input
49}
50
51func (p *ParseError) String() string {
52 if p.Line == 1 {
53 // show offset only for first line
54 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
55 }
56 return fmt.Sprintf("line %d: %v", p.Line, p.Message)
57}
58
59type token struct {
60 value string
61 err *ParseError
62 line int // line number
63 offset int // byte number from start of input, not start of line
64 unquoted string // the unquoted version of value, if it was a quoted string
65}
66
67func (t *token) String() string {
68 if t.err == nil {
69 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
70 }
71 return fmt.Sprintf("parse error: %v", t.err)
72}
73
74type textParser struct {
75 s string // remaining input
76 done bool // whether the parsing is finished (success or error)
77 backed bool // whether back() was called
78 offset, line int
79 cur token
80}
81
82func newTextParser(s string) *textParser {
83 p := new(textParser)
84 p.s = s
85 p.line = 1
86 p.cur.line = 1
87 return p
88}
89
Rob Piked6420b82011-04-13 16:37:04 -070090func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
Rob Pikead7cac72010-09-29 12:29:26 -070091 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
Rob Pikeaaa3a622010-03-20 22:32:34 -070092 p.cur.err = pe
93 p.done = true
94 return pe
95}
96
97// Numbers and identifiers are matched by [-+._A-Za-z0-9]
98func isIdentOrNumberChar(c byte) bool {
99 switch {
100 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
101 return true
102 case '0' <= c && c <= '9':
103 return true
104 }
105 switch c {
106 case '-', '+', '.', '_':
107 return true
108 }
109 return false
110}
111
112func isWhitespace(c byte) bool {
113 switch c {
114 case ' ', '\t', '\n', '\r':
115 return true
116 }
117 return false
118}
119
120func (p *textParser) skipWhitespace() {
121 i := 0
122 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
123 if p.s[i] == '#' {
124 // comment; skip to end of line or input
125 for i < len(p.s) && p.s[i] != '\n' {
126 i++
127 }
128 if i == len(p.s) {
129 break
130 }
131 }
132 if p.s[i] == '\n' {
133 p.line++
134 }
135 i++
136 }
137 p.offset += i
138 p.s = p.s[i:len(p.s)]
139 if len(p.s) == 0 {
140 p.done = true
141 }
142}
143
144func (p *textParser) advance() {
145 // Skip whitespace
146 p.skipWhitespace()
147 if p.done {
148 return
149 }
150
151 // Start of non-whitespace
152 p.cur.err = nil
153 p.cur.offset, p.cur.line = p.offset, p.line
154 p.cur.unquoted = ""
155 switch p.s[0] {
156 case '<', '>', '{', '}', ':':
157 // Single symbol
158 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
159 case '"':
160 // Quoted string
161 i := 1
162 for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
163 if p.s[i] == '\\' && i+1 < len(p.s) {
164 // skip escaped char
165 i++
166 }
167 i++
168 }
169 if i >= len(p.s) || p.s[i] != '"' {
Rob Piked6420b82011-04-13 16:37:04 -0700170 p.errorf("unmatched quote")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700171 return
172 }
173 // TODO: Should be UnquoteC.
174 unq, err := strconv.Unquote(p.s[0 : i+1])
175 if err != nil {
Rob Piked6420b82011-04-13 16:37:04 -0700176 p.errorf("invalid quoted string %v", p.s[0:i+1])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700177 return
178 }
179 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
180 p.cur.unquoted = unq
181 default:
182 i := 0
183 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
184 i++
185 }
186 if i == 0 {
Rob Piked6420b82011-04-13 16:37:04 -0700187 p.errorf("unexpected byte %#x", p.s[0])
Rob Pikeaaa3a622010-03-20 22:32:34 -0700188 return
189 }
190 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
191 }
192 p.offset += len(p.cur.value)
193}
194
195// Back off the parser by one token. Can only be done between calls to next().
196// It makes the next advance() a no-op.
197func (p *textParser) back() { p.backed = true }
198
199// Advances the parser and returns the new current token.
200func (p *textParser) next() *token {
201 if p.backed || p.done {
202 p.backed = false
203 return &p.cur
204 }
205 p.advance()
206 if p.done {
207 p.cur.value = ""
208 } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
209 // Look for multiple quoted strings separated by whitespace,
210 // and concatenate them.
211 cat := p.cur
212 for {
213 p.skipWhitespace()
214 if p.done || p.s[0] != '"' {
215 break
216 }
217 p.advance()
218 if p.cur.err != nil {
219 return &p.cur
220 }
221 cat.value += " " + p.cur.value
222 cat.unquoted += p.cur.unquoted
223 }
224 p.done = false // parser may have seen EOF, but we want to return cat
225 p.cur = cat
226 }
227 return &p.cur
228}
229
Rob Pikeaaa3a622010-03-20 22:32:34 -0700230// Return an error indicating which required field was not set.
Rob Pike97e934d2011-04-11 12:52:49 -0700231func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError {
232 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700233 sprops := GetProperties(st)
234 for i := 0; i < st.NumField(); i++ {
Rob Pike97e934d2011-04-11 12:52:49 -0700235 if !isNil(sv.Field(i)) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700236 continue
237 }
238
239 props := sprops.Prop[i]
240 if props.Required {
Rob Piked6420b82011-04-13 16:37:04 -0700241 return p.errorf("message %v missing required field %q", st, props.OrigName)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700242 }
243 }
Rob Piked6420b82011-04-13 16:37:04 -0700244 return p.errorf("message %v missing required field", st) // should not happen
Rob Pikeaaa3a622010-03-20 22:32:34 -0700245}
246
247// Returns the index in the struct for the named field, as well as the parsed tag properties.
Rob Pike97e934d2011-04-11 12:52:49 -0700248func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700249 sprops := GetProperties(st)
David Symonds79eae332010-10-16 11:33:20 +1100250 i, ok := sprops.origNames[name]
251 if ok {
252 return i, sprops.Prop[i], true
Rob Pikeaaa3a622010-03-20 22:32:34 -0700253 }
254 return -1, nil, false
255}
256
Rob Pike97e934d2011-04-11 12:52:49 -0700257func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError {
258 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700259 reqCount := GetProperties(st).reqCount
260 // A struct is a sequence of "name: value", terminated by one of
261 // '>' or '}', or the end of the input.
262 for {
263 tok := p.next()
264 if tok.err != nil {
265 return tok.err
266 }
267 if tok.value == terminator {
268 break
269 }
270
271 fi, props, ok := structFieldByName(st, tok.value)
272 if !ok {
Rob Piked6420b82011-04-13 16:37:04 -0700273 return p.errorf("unknown field name %q in %v", tok.value, st)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700274 }
275
276 // Check that it's not already set if it's not a repeated field.
Rob Pike97e934d2011-04-11 12:52:49 -0700277 if !props.Repeated && !isNil(sv.Field(fi)) {
Rob Piked6420b82011-04-13 16:37:04 -0700278 return p.errorf("non-repeated field %q was repeated", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700279 }
280
281 tok = p.next()
282 if tok.err != nil {
283 return tok.err
284 }
285 if tok.value != ":" {
286 // Colon is optional when the field is a group or message.
287 needColon := true
288 switch props.Wire {
289 case "group":
290 needColon = false
291 case "bytes":
292 // A "bytes" field is either a message, a string, or a repeated field;
293 // those three become *T, *string and []T respectively, so we can check for
294 // this field being a pointer to a non-string.
295 typ := st.Field(fi).Type
David Symondsa9cda212011-04-15 01:23:17 -0700296 if typ.Kind() == reflect.Ptr {
Rob Pikeaaf695a2010-06-22 15:51:21 -0700297 // *T or *string
David Symondsa9cda212011-04-15 01:23:17 -0700298 if typ.Elem().Kind() == reflect.String {
Rob Pikeaaf695a2010-06-22 15:51:21 -0700299 break
300 }
David Symondsa9cda212011-04-15 01:23:17 -0700301 } else if typ.Kind() == reflect.Slice {
Rob Pikeaaf695a2010-06-22 15:51:21 -0700302 // []T or []*T
David Symondsa9cda212011-04-15 01:23:17 -0700303 if typ.Elem().Kind() != reflect.Ptr {
Rob Pikeaaf695a2010-06-22 15:51:21 -0700304 break
305 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700306 }
307 needColon = false
308 }
309 if needColon {
Rob Piked6420b82011-04-13 16:37:04 -0700310 return p.errorf("expected ':', found %q", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700311 }
312 p.back()
313 }
314
315 // Parse into the field.
316 if err := p.readAny(sv.Field(fi), props); err != nil {
317 return err
318 }
319
320 if props.Required {
321 reqCount--
322 }
323 }
324
325 if reqCount > 0 {
326 return p.missingRequiredFieldError(sv)
327 }
328 return nil
329}
330
331const (
332 minInt32 = -1 << 31
333 maxInt32 = 1<<31 - 1
334 maxUint32 = 1<<32 - 1
335)
336
337func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
338 tok := p.next()
339 if tok.err != nil {
340 return tok.err
341 }
342 if tok.value == "" {
Rob Piked6420b82011-04-13 16:37:04 -0700343 return p.errorf("unexpected EOF")
Rob Pikeaaa3a622010-03-20 22:32:34 -0700344 }
345
Rob Pike97e934d2011-04-11 12:52:49 -0700346 switch fv := v; fv.Kind() {
347 case reflect.Slice:
348 at := v.Type()
Rob Pikeab5b8022010-06-21 17:47:58 -0700349 if at.Elem().Kind() == reflect.Uint8 {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700350 // Special case for []byte
351 if tok.value[0] != '"' {
352 // Deliberately written out here, as the error after
353 // this switch statement would write "invalid []byte: ...",
354 // which is not as user-friendly.
Rob Piked6420b82011-04-13 16:37:04 -0700355 return p.errorf("invalid string: %v", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700356 }
357 bytes := []byte(tok.unquoted)
Nigel Tao4ede8452011-04-28 11:27:25 +1000358 fv.Set(reflect.ValueOf(bytes))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700359 return nil
360 }
361 // Repeated field. May already exist.
David Symonds79eae332010-10-16 11:33:20 +1100362 flen := fv.Len()
363 if flen == fv.Cap() {
364 nav := reflect.MakeSlice(at, flen, 2*flen+1)
Rob Pike48fd4a42010-12-14 23:40:41 -0800365 reflect.Copy(nav, fv)
David Symonds79eae332010-10-16 11:33:20 +1100366 fv.Set(nav)
367 }
368 fv.SetLen(flen + 1)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700369
370 // Read one.
371 p.back()
David Symondsef8f0e82011-10-13 12:57:34 +1100372 return p.readAny(fv.Index(flen), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700373 case reflect.Bool:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700374 // Either "true", "false", 1 or 0.
375 switch tok.value {
376 case "true", "1":
Rob Pike97e934d2011-04-11 12:52:49 -0700377 fv.SetBool(true)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700378 return nil
379 case "false", "0":
Rob Pike97e934d2011-04-11 12:52:49 -0700380 fv.SetBool(false)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700381 return nil
382 }
Rob Pike97e934d2011-04-11 12:52:49 -0700383 case reflect.Float32, reflect.Float64:
Rob Pikeab5b8022010-06-21 17:47:58 -0700384 if f, err := strconv.AtofN(tok.value, fv.Type().Bits()); err == nil {
Rob Pike97e934d2011-04-11 12:52:49 -0700385 fv.SetFloat(f)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700386 return nil
387 }
Rob Pike19b2dbb2011-04-11 16:49:15 -0700388 case reflect.Int32:
389 if x, err := strconv.Atoi64(tok.value); err == nil && minInt32 <= x && x <= maxInt32 {
390 fv.SetInt(x)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700391 return nil
Rob Pike19b2dbb2011-04-11 16:49:15 -0700392 }
393 if len(props.Enum) == 0 {
394 break
395 }
396 m, ok := enumValueMaps[props.Enum]
397 if !ok {
398 break
399 }
400 x, ok := m[tok.value]
401 if !ok {
402 break
403 }
404 fv.SetInt(int64(x))
405 return nil
406 case reflect.Int64:
407 if x, err := strconv.Atoi64(tok.value); err == nil {
408 fv.SetInt(x)
409 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700410 }
Rob Pike97e934d2011-04-11 12:52:49 -0700411 case reflect.Ptr:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700412 // A basic field (indirected through pointer), or a repeated message/group
413 p.back()
Rob Pikeccd260c2011-04-18 13:13:04 -0700414 fv.Set(reflect.New(fv.Type().Elem()))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700415 return p.readAny(fv.Elem(), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700416 case reflect.String:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700417 if tok.value[0] == '"' {
Rob Pike97e934d2011-04-11 12:52:49 -0700418 fv.SetString(tok.unquoted)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700419 return nil
420 }
Rob Pike97e934d2011-04-11 12:52:49 -0700421 case reflect.Struct:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700422 var terminator string
423 switch tok.value {
424 case "{":
425 terminator = "}"
426 case "<":
427 terminator = ">"
428 default:
Rob Piked6420b82011-04-13 16:37:04 -0700429 return p.errorf("expected '{' or '<', found %q", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700430 }
431 return p.readStruct(fv, terminator)
Rob Pike19b2dbb2011-04-11 16:49:15 -0700432 case reflect.Uint32:
433 if x, err := strconv.Atoui64(tok.value); err == nil && x <= maxUint32 {
434 fv.SetUint(uint64(x))
435 return nil
436 }
437 case reflect.Uint64:
438 if x, err := strconv.Atoui64(tok.value); err == nil {
439 fv.SetUint(x)
440 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700441 }
442 }
Rob Piked6420b82011-04-13 16:37:04 -0700443 return p.errorf("invalid %v: %v", v.Type(), tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700444}
445
446var notPtrStruct os.Error = &ParseError{"destination is not a pointer to a struct", 0, 0}
447
448// UnmarshalText reads a protobuffer in Text format.
449func UnmarshalText(s string, pb interface{}) os.Error {
Nigel Tao4ede8452011-04-28 11:27:25 +1000450 v := reflect.ValueOf(pb)
David Symondsa9cda212011-04-15 01:23:17 -0700451 if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Struct {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700452 return notPtrStruct
453 }
David Symondsa9cda212011-04-15 01:23:17 -0700454 if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700455 return pe
456 }
457 return nil
458}