blob: 57174ee5d50d570a0d6fa5b8873eb830c0994c37 [file] [log] [blame]
Rob Pikeaaa3a622010-03-20 22:32:34 -07001// Go support for Protocol Buffers - Google's data interchange format
2//
3// Copyright 2010 Google Inc. All rights reserved.
4// http://code.google.com/p/goprotobuf/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32package proto
33
34// Functions for parsing the Text protocol buffer format.
35// TODO:
David Symonds4fee3b12010-11-11 10:00:13 +110036// - message sets, groups.
Rob Pikeaaa3a622010-03-20 22:32:34 -070037
38import (
39 "fmt"
40 "os"
41 "reflect"
42 "strconv"
43)
44
45// ParseError satisfies the os.Error interface.
46type ParseError struct {
47 Message string
48 Line int // 1-based line number
49 Offset int // 0-based byte offset from start of input
50}
51
52func (p *ParseError) String() string {
53 if p.Line == 1 {
54 // show offset only for first line
55 return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
56 }
57 return fmt.Sprintf("line %d: %v", p.Line, p.Message)
58}
59
60type token struct {
61 value string
62 err *ParseError
63 line int // line number
64 offset int // byte number from start of input, not start of line
65 unquoted string // the unquoted version of value, if it was a quoted string
66}
67
68func (t *token) String() string {
69 if t.err == nil {
70 return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
71 }
72 return fmt.Sprintf("parse error: %v", t.err)
73}
74
75type textParser struct {
76 s string // remaining input
77 done bool // whether the parsing is finished (success or error)
78 backed bool // whether back() was called
79 offset, line int
80 cur token
81}
82
83func newTextParser(s string) *textParser {
84 p := new(textParser)
85 p.s = s
86 p.line = 1
87 p.cur.line = 1
88 return p
89}
90
91func (p *textParser) error(format string, a ...interface{}) *ParseError {
Rob Pikead7cac72010-09-29 12:29:26 -070092 pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
Rob Pikeaaa3a622010-03-20 22:32:34 -070093 p.cur.err = pe
94 p.done = true
95 return pe
96}
97
98// Numbers and identifiers are matched by [-+._A-Za-z0-9]
99func isIdentOrNumberChar(c byte) bool {
100 switch {
101 case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
102 return true
103 case '0' <= c && c <= '9':
104 return true
105 }
106 switch c {
107 case '-', '+', '.', '_':
108 return true
109 }
110 return false
111}
112
113func isWhitespace(c byte) bool {
114 switch c {
115 case ' ', '\t', '\n', '\r':
116 return true
117 }
118 return false
119}
120
121func (p *textParser) skipWhitespace() {
122 i := 0
123 for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
124 if p.s[i] == '#' {
125 // comment; skip to end of line or input
126 for i < len(p.s) && p.s[i] != '\n' {
127 i++
128 }
129 if i == len(p.s) {
130 break
131 }
132 }
133 if p.s[i] == '\n' {
134 p.line++
135 }
136 i++
137 }
138 p.offset += i
139 p.s = p.s[i:len(p.s)]
140 if len(p.s) == 0 {
141 p.done = true
142 }
143}
144
145func (p *textParser) advance() {
146 // Skip whitespace
147 p.skipWhitespace()
148 if p.done {
149 return
150 }
151
152 // Start of non-whitespace
153 p.cur.err = nil
154 p.cur.offset, p.cur.line = p.offset, p.line
155 p.cur.unquoted = ""
156 switch p.s[0] {
157 case '<', '>', '{', '}', ':':
158 // Single symbol
159 p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
160 case '"':
161 // Quoted string
162 i := 1
163 for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
164 if p.s[i] == '\\' && i+1 < len(p.s) {
165 // skip escaped char
166 i++
167 }
168 i++
169 }
170 if i >= len(p.s) || p.s[i] != '"' {
171 p.error("unmatched quote")
172 return
173 }
174 // TODO: Should be UnquoteC.
175 unq, err := strconv.Unquote(p.s[0 : i+1])
176 if err != nil {
177 p.error("invalid quoted string %v", p.s[0:i+1])
178 return
179 }
180 p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
181 p.cur.unquoted = unq
182 default:
183 i := 0
184 for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
185 i++
186 }
187 if i == 0 {
188 p.error("unexpected byte %#x", p.s[0])
189 return
190 }
191 p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
192 }
193 p.offset += len(p.cur.value)
194}
195
196// Back off the parser by one token. Can only be done between calls to next().
197// It makes the next advance() a no-op.
198func (p *textParser) back() { p.backed = true }
199
200// Advances the parser and returns the new current token.
201func (p *textParser) next() *token {
202 if p.backed || p.done {
203 p.backed = false
204 return &p.cur
205 }
206 p.advance()
207 if p.done {
208 p.cur.value = ""
209 } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
210 // Look for multiple quoted strings separated by whitespace,
211 // and concatenate them.
212 cat := p.cur
213 for {
214 p.skipWhitespace()
215 if p.done || p.s[0] != '"' {
216 break
217 }
218 p.advance()
219 if p.cur.err != nil {
220 return &p.cur
221 }
222 cat.value += " " + p.cur.value
223 cat.unquoted += p.cur.unquoted
224 }
225 p.done = false // parser may have seen EOF, but we want to return cat
226 p.cur = cat
227 }
228 return &p.cur
229}
230
Rob Pikeaaa3a622010-03-20 22:32:34 -0700231// Return an error indicating which required field was not set.
Rob Pike97e934d2011-04-11 12:52:49 -0700232func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError {
233 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700234 sprops := GetProperties(st)
235 for i := 0; i < st.NumField(); i++ {
Rob Pike97e934d2011-04-11 12:52:49 -0700236 if !isNil(sv.Field(i)) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700237 continue
238 }
239
240 props := sprops.Prop[i]
241 if props.Required {
242 return p.error("message %v missing required field %q", st, props.OrigName)
243 }
244 }
245 return p.error("message %v missing required field", st) // should not happen
246}
247
248// Returns the index in the struct for the named field, as well as the parsed tag properties.
Rob Pike97e934d2011-04-11 12:52:49 -0700249func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700250 sprops := GetProperties(st)
David Symonds79eae332010-10-16 11:33:20 +1100251 i, ok := sprops.origNames[name]
252 if ok {
253 return i, sprops.Prop[i], true
Rob Pikeaaa3a622010-03-20 22:32:34 -0700254 }
255 return -1, nil, false
256}
257
Rob Pike97e934d2011-04-11 12:52:49 -0700258func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError {
259 st := sv.Type()
Rob Pikeaaa3a622010-03-20 22:32:34 -0700260 reqCount := GetProperties(st).reqCount
261 // A struct is a sequence of "name: value", terminated by one of
262 // '>' or '}', or the end of the input.
263 for {
264 tok := p.next()
265 if tok.err != nil {
266 return tok.err
267 }
268 if tok.value == terminator {
269 break
270 }
271
272 fi, props, ok := structFieldByName(st, tok.value)
273 if !ok {
274 return p.error("unknown field name %q in %v", tok.value, st)
275 }
276
277 // Check that it's not already set if it's not a repeated field.
Rob Pike97e934d2011-04-11 12:52:49 -0700278 if !props.Repeated && !isNil(sv.Field(fi)) {
279 return p.error("non-repeated field %q was repeated", tok.value)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700280 }
281
282 tok = p.next()
283 if tok.err != nil {
284 return tok.err
285 }
286 if tok.value != ":" {
287 // Colon is optional when the field is a group or message.
288 needColon := true
289 switch props.Wire {
290 case "group":
291 needColon = false
292 case "bytes":
293 // A "bytes" field is either a message, a string, or a repeated field;
294 // those three become *T, *string and []T respectively, so we can check for
295 // this field being a pointer to a non-string.
296 typ := st.Field(fi).Type
Rob Pike97e934d2011-04-11 12:52:49 -0700297 if pt := typ; pt.Kind() == reflect.Ptr {
Rob Pikeaaf695a2010-06-22 15:51:21 -0700298 // *T or *string
Rob Pike97e934d2011-04-11 12:52:49 -0700299 if pt.Elem().Kind() == reflect.String {
Rob Pikeaaf695a2010-06-22 15:51:21 -0700300 break
301 }
Rob Pike97e934d2011-04-11 12:52:49 -0700302 } else if st := typ; st.Kind() == reflect.Slice {
Rob Pikeaaf695a2010-06-22 15:51:21 -0700303 // []T or []*T
Rob Pike97e934d2011-04-11 12:52:49 -0700304 if st.Elem().Kind() != reflect.Ptr {
Rob Pikeaaf695a2010-06-22 15:51:21 -0700305 break
306 }
Rob Pikeaaa3a622010-03-20 22:32:34 -0700307 }
308 needColon = false
309 }
310 if needColon {
311 return p.error("expected ':', found %q", tok.value)
312 }
313 p.back()
314 }
315
316 // Parse into the field.
317 if err := p.readAny(sv.Field(fi), props); err != nil {
318 return err
319 }
320
321 if props.Required {
322 reqCount--
323 }
324 }
325
326 if reqCount > 0 {
327 return p.missingRequiredFieldError(sv)
328 }
329 return nil
330}
331
332const (
333 minInt32 = -1 << 31
334 maxInt32 = 1<<31 - 1
335 maxUint32 = 1<<32 - 1
336)
337
338func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
339 tok := p.next()
340 if tok.err != nil {
341 return tok.err
342 }
343 if tok.value == "" {
344 return p.error("unexpected EOF")
345 }
346
Rob Pike97e934d2011-04-11 12:52:49 -0700347 switch fv := v; fv.Kind() {
348 case reflect.Slice:
349 at := v.Type()
Rob Pikeab5b8022010-06-21 17:47:58 -0700350 if at.Elem().Kind() == reflect.Uint8 {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700351 // Special case for []byte
352 if tok.value[0] != '"' {
353 // Deliberately written out here, as the error after
354 // this switch statement would write "invalid []byte: ...",
355 // which is not as user-friendly.
356 return p.error("invalid string: %v", tok.value)
357 }
358 bytes := []byte(tok.unquoted)
Rob Pike97e934d2011-04-11 12:52:49 -0700359 fv.Set(reflect.NewValue(bytes))
Rob Pikeaaa3a622010-03-20 22:32:34 -0700360 return nil
361 }
362 // Repeated field. May already exist.
David Symonds79eae332010-10-16 11:33:20 +1100363 flen := fv.Len()
364 if flen == fv.Cap() {
365 nav := reflect.MakeSlice(at, flen, 2*flen+1)
Rob Pike48fd4a42010-12-14 23:40:41 -0800366 reflect.Copy(nav, fv)
David Symonds79eae332010-10-16 11:33:20 +1100367 fv.Set(nav)
368 }
369 fv.SetLen(flen + 1)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700370
371 // Read one.
372 p.back()
Rob Pike97e934d2011-04-11 12:52:49 -0700373 return p.readAny(fv.Index(flen), nil) // TODO: pass properties?
374 case reflect.Bool:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700375 // Either "true", "false", 1 or 0.
376 switch tok.value {
377 case "true", "1":
Rob Pike97e934d2011-04-11 12:52:49 -0700378 fv.SetBool(true)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700379 return nil
380 case "false", "0":
Rob Pike97e934d2011-04-11 12:52:49 -0700381 fv.SetBool(false)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700382 return nil
383 }
Rob Pike97e934d2011-04-11 12:52:49 -0700384 case reflect.Float32, reflect.Float64:
Rob Pikeab5b8022010-06-21 17:47:58 -0700385 if f, err := strconv.AtofN(tok.value, fv.Type().Bits()); err == nil {
Rob Pike97e934d2011-04-11 12:52:49 -0700386 fv.SetFloat(f)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700387 return nil
388 }
Rob Pike19b2dbb2011-04-11 16:49:15 -0700389 case reflect.Int32:
390 if x, err := strconv.Atoi64(tok.value); err == nil && minInt32 <= x && x <= maxInt32 {
391 fv.SetInt(x)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700392 return nil
Rob Pike19b2dbb2011-04-11 16:49:15 -0700393 }
394 if len(props.Enum) == 0 {
395 break
396 }
397 m, ok := enumValueMaps[props.Enum]
398 if !ok {
399 break
400 }
401 x, ok := m[tok.value]
402 if !ok {
403 break
404 }
405 fv.SetInt(int64(x))
406 return nil
407 case reflect.Int64:
408 if x, err := strconv.Atoi64(tok.value); err == nil {
409 fv.SetInt(x)
410 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700411 }
Rob Pike97e934d2011-04-11 12:52:49 -0700412 case reflect.Ptr:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700413 // A basic field (indirected through pointer), or a repeated message/group
414 p.back()
Rob Pike97e934d2011-04-11 12:52:49 -0700415 fv.Set(reflect.Zero(fv.Type().Elem()).Addr())
Rob Pikeaaa3a622010-03-20 22:32:34 -0700416 return p.readAny(fv.Elem(), props)
Rob Pike97e934d2011-04-11 12:52:49 -0700417 case reflect.String:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700418 if tok.value[0] == '"' {
Rob Pike97e934d2011-04-11 12:52:49 -0700419 fv.SetString(tok.unquoted)
Rob Pikeaaa3a622010-03-20 22:32:34 -0700420 return nil
421 }
Rob Pike97e934d2011-04-11 12:52:49 -0700422 case reflect.Struct:
Rob Pikeaaa3a622010-03-20 22:32:34 -0700423 var terminator string
424 switch tok.value {
425 case "{":
426 terminator = "}"
427 case "<":
428 terminator = ">"
429 default:
430 return p.error("expected '{' or '<', found %q", tok.value)
431 }
432 return p.readStruct(fv, terminator)
Rob Pike19b2dbb2011-04-11 16:49:15 -0700433 case reflect.Uint32:
434 if x, err := strconv.Atoui64(tok.value); err == nil && x <= maxUint32 {
435 fv.SetUint(uint64(x))
436 return nil
437 }
438 case reflect.Uint64:
439 if x, err := strconv.Atoui64(tok.value); err == nil {
440 fv.SetUint(x)
441 return nil
Rob Pikeaaa3a622010-03-20 22:32:34 -0700442 }
443 }
444 return p.error("invalid %v: %v", v.Type(), tok.value)
445}
446
447var notPtrStruct os.Error = &ParseError{"destination is not a pointer to a struct", 0, 0}
448
449// UnmarshalText reads a protobuffer in Text format.
450func UnmarshalText(s string, pb interface{}) os.Error {
Rob Pike97e934d2011-04-11 12:52:49 -0700451 pv := reflect.NewValue(pb)
452 ok := pv.Kind() == reflect.Ptr
Rob Pikeaaa3a622010-03-20 22:32:34 -0700453 if !ok {
454 return notPtrStruct
455 }
Rob Pike97e934d2011-04-11 12:52:49 -0700456 sv := pv.Elem()
457 if sv.Kind() != reflect.Struct {
Rob Pikeaaa3a622010-03-20 22:32:34 -0700458 return notPtrStruct
459 }
460 if pe := newTextParser(s).readStruct(sv, ""); pe != nil {
461 return pe
462 }
463 return nil
464}