| // Go support for Protocol Buffers - Google's data interchange format |
| // |
| // Copyright 2010 Google Inc. All rights reserved. |
| // http://code.google.com/p/goprotobuf/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| package proto |
| |
| // Functions for parsing the Text protocol buffer format. |
| // TODO: |
| // - groups. |
| |
| import ( |
| "fmt" |
| "os" |
| "reflect" |
| "strconv" |
| ) |
| |
| // ParseError satisfies the os.Error interface. |
| type ParseError struct { |
| Message string |
| Line int // 1-based line number |
| Offset int // 0-based byte offset from start of input |
| } |
| |
| func (p *ParseError) String() string { |
| if p.Line == 1 { |
| // show offset only for first line |
| return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message) |
| } |
| return fmt.Sprintf("line %d: %v", p.Line, p.Message) |
| } |
| |
| type token struct { |
| value string |
| err *ParseError |
| line int // line number |
| offset int // byte number from start of input, not start of line |
| unquoted string // the unquoted version of value, if it was a quoted string |
| } |
| |
| func (t *token) String() string { |
| if t.err == nil { |
| return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset) |
| } |
| return fmt.Sprintf("parse error: %v", t.err) |
| } |
| |
| type textParser struct { |
| s string // remaining input |
| done bool // whether the parsing is finished (success or error) |
| backed bool // whether back() was called |
| offset, line int |
| cur token |
| } |
| |
| func newTextParser(s string) *textParser { |
| p := new(textParser) |
| p.s = s |
| p.line = 1 |
| p.cur.line = 1 |
| return p |
| } |
| |
| func (p *textParser) error(format string, a ...interface{}) *ParseError { |
| pe := &ParseError{fmt.Sprintf(format, a), p.cur.line, p.cur.offset} |
| p.cur.err = pe |
| p.done = true |
| return pe |
| } |
| |
| // Numbers and identifiers are matched by [-+._A-Za-z0-9] |
| func isIdentOrNumberChar(c byte) bool { |
| switch { |
| case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z': |
| return true |
| case '0' <= c && c <= '9': |
| return true |
| } |
| switch c { |
| case '-', '+', '.', '_': |
| return true |
| } |
| return false |
| } |
| |
| func isWhitespace(c byte) bool { |
| switch c { |
| case ' ', '\t', '\n', '\r': |
| return true |
| } |
| return false |
| } |
| |
| func (p *textParser) skipWhitespace() { |
| i := 0 |
| for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') { |
| if p.s[i] == '#' { |
| // comment; skip to end of line or input |
| for i < len(p.s) && p.s[i] != '\n' { |
| i++ |
| } |
| if i == len(p.s) { |
| break |
| } |
| } |
| if p.s[i] == '\n' { |
| p.line++ |
| } |
| i++ |
| } |
| p.offset += i |
| p.s = p.s[i:len(p.s)] |
| if len(p.s) == 0 { |
| p.done = true |
| } |
| } |
| |
| func (p *textParser) advance() { |
| // Skip whitespace |
| p.skipWhitespace() |
| if p.done { |
| return |
| } |
| |
| // Start of non-whitespace |
| p.cur.err = nil |
| p.cur.offset, p.cur.line = p.offset, p.line |
| p.cur.unquoted = "" |
| switch p.s[0] { |
| case '<', '>', '{', '}', ':': |
| // Single symbol |
| p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)] |
| case '"': |
| // Quoted string |
| i := 1 |
| for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' { |
| if p.s[i] == '\\' && i+1 < len(p.s) { |
| // skip escaped char |
| i++ |
| } |
| i++ |
| } |
| if i >= len(p.s) || p.s[i] != '"' { |
| p.error("unmatched quote") |
| return |
| } |
| // TODO: Should be UnquoteC. |
| unq, err := strconv.Unquote(p.s[0 : i+1]) |
| if err != nil { |
| p.error("invalid quoted string %v", p.s[0:i+1]) |
| return |
| } |
| p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)] |
| p.cur.unquoted = unq |
| default: |
| i := 0 |
| for i < len(p.s) && isIdentOrNumberChar(p.s[i]) { |
| i++ |
| } |
| if i == 0 { |
| p.error("unexpected byte %#x", p.s[0]) |
| return |
| } |
| p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)] |
| } |
| p.offset += len(p.cur.value) |
| } |
| |
| // Back off the parser by one token. Can only be done between calls to next(). |
| // It makes the next advance() a no-op. |
| func (p *textParser) back() { p.backed = true } |
| |
| // Advances the parser and returns the new current token. |
| func (p *textParser) next() *token { |
| if p.backed || p.done { |
| p.backed = false |
| return &p.cur |
| } |
| p.advance() |
| if p.done { |
| p.cur.value = "" |
| } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' { |
| // Look for multiple quoted strings separated by whitespace, |
| // and concatenate them. |
| cat := p.cur |
| for { |
| p.skipWhitespace() |
| if p.done || p.s[0] != '"' { |
| break |
| } |
| p.advance() |
| if p.cur.err != nil { |
| return &p.cur |
| } |
| cat.value += " " + p.cur.value |
| cat.unquoted += p.cur.unquoted |
| } |
| p.done = false // parser may have seen EOF, but we want to return cat |
| p.cur = cat |
| } |
| return &p.cur |
| } |
| |
| type nillable interface { |
| IsNil() bool |
| } |
| |
| // Return an error indicating which required field was not set. |
| func (p *textParser) missingRequiredFieldError(sv *reflect.StructValue) *ParseError { |
| st := sv.Type().(*reflect.StructType) |
| sprops := GetProperties(st) |
| for i := 0; i < st.NumField(); i++ { |
| // All protocol buffer fields are nillable, but let's be careful. |
| nfv, ok := sv.Field(i).(nillable) |
| if !ok || !nfv.IsNil() { |
| continue |
| } |
| |
| props := sprops.Prop[i] |
| if props.Required { |
| return p.error("message %v missing required field %q", st, props.OrigName) |
| } |
| } |
| return p.error("message %v missing required field", st) // should not happen |
| } |
| |
| // Returns the index in the struct for the named field, as well as the parsed tag properties. |
| func structFieldByName(st *reflect.StructType, name string) (int, *Properties, bool) { |
| sprops := GetProperties(st) |
| for i := 0; i < st.NumField(); i++ { |
| props := sprops.Prop[i] |
| if props.OrigName == name { |
| return i, props, true |
| } |
| } |
| return -1, nil, false |
| } |
| |
| func (p *textParser) readStruct(sv *reflect.StructValue, terminator string) *ParseError { |
| st := sv.Type().(*reflect.StructType) |
| reqCount := GetProperties(st).reqCount |
| // A struct is a sequence of "name: value", terminated by one of |
| // '>' or '}', or the end of the input. |
| for { |
| tok := p.next() |
| if tok.err != nil { |
| return tok.err |
| } |
| if tok.value == terminator { |
| break |
| } |
| |
| fi, props, ok := structFieldByName(st, tok.value) |
| if !ok { |
| return p.error("unknown field name %q in %v", tok.value, st) |
| } |
| |
| // Check that it's not already set if it's not a repeated field. |
| if !props.Repeated { |
| if nfv, ok := sv.Field(fi).(nillable); ok && !nfv.IsNil() { |
| return p.error("non-repeated field %q was repeated", tok.value) |
| } |
| } |
| |
| tok = p.next() |
| if tok.err != nil { |
| return tok.err |
| } |
| if tok.value != ":" { |
| // Colon is optional when the field is a group or message. |
| needColon := true |
| switch props.Wire { |
| case "group": |
| needColon = false |
| case "bytes": |
| // A "bytes" field is either a message, a string, or a repeated field; |
| // those three become *T, *string and []T respectively, so we can check for |
| // this field being a pointer to a non-string. |
| typ := st.Field(fi).Type |
| pt, ok := typ.(*reflect.PtrType) |
| if !ok { |
| break |
| } |
| _, ok = pt.Elem().(*reflect.StringType) |
| if ok { |
| break |
| } |
| needColon = false |
| } |
| if needColon { |
| return p.error("expected ':', found %q", tok.value) |
| } |
| p.back() |
| } |
| |
| // Parse into the field. |
| if err := p.readAny(sv.Field(fi), props); err != nil { |
| return err |
| } |
| |
| if props.Required { |
| reqCount-- |
| } |
| } |
| |
| if reqCount > 0 { |
| return p.missingRequiredFieldError(sv) |
| } |
| return nil |
| } |
| |
| const ( |
| minInt32 = -1 << 31 |
| maxInt32 = 1<<31 - 1 |
| maxUint32 = 1<<32 - 1 |
| ) |
| |
| func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError { |
| tok := p.next() |
| if tok.err != nil { |
| return tok.err |
| } |
| if tok.value == "" { |
| return p.error("unexpected EOF") |
| } |
| |
| switch fv := v.(type) { |
| case *reflect.SliceValue: |
| at := v.Type().(*reflect.SliceType) |
| if _, ok := at.Elem().(*reflect.Uint8Type); ok { |
| // Special case for []byte |
| if tok.value[0] != '"' { |
| // Deliberately written out here, as the error after |
| // this switch statement would write "invalid []byte: ...", |
| // which is not as user-friendly. |
| return p.error("invalid string: %v", tok.value) |
| } |
| bytes := []byte(tok.unquoted) |
| fv.Set(reflect.NewValue(bytes).(*reflect.SliceValue)) |
| return nil |
| } |
| // Repeated field. May already exist. |
| cnt := fv.Len() |
| nav := reflect.MakeSlice(at, cnt, cnt+1) |
| reflect.ArrayCopy(nav, fv) |
| fv.Set(nav) |
| fv.SetLen(cnt + 1) |
| |
| // Read one. |
| p.back() |
| return p.readAny(fv.Elem(cnt), nil) // TODO: pass properties? |
| case *reflect.BoolValue: |
| // Either "true", "false", 1 or 0. |
| switch tok.value { |
| case "true", "1": |
| fv.Set(true) |
| return nil |
| case "false", "0": |
| fv.Set(false) |
| return nil |
| } |
| case *reflect.Float32Value: |
| if f, err := strconv.Atof32(tok.value); err == nil { |
| fv.Set(f) |
| return nil |
| } |
| case *reflect.Float64Value: |
| if f, err := strconv.Atof64(tok.value); err == nil { |
| fv.Set(f) |
| return nil |
| } |
| case *reflect.Int32Value: |
| if x, err := strconv.Atoi64(tok.value); err == nil && minInt32 <= x && x <= maxInt32 { |
| fv.Set(int32(x)) |
| return nil |
| } |
| if len(props.Enum) == 0 { |
| break |
| } |
| m, ok := enumValueMaps[props.Enum] |
| if !ok { |
| break |
| } |
| x, ok := m[tok.value] |
| if !ok { |
| break |
| } |
| fv.Set(x) |
| return nil |
| case *reflect.Int64Value: |
| if x, err := strconv.Atoi64(tok.value); err == nil { |
| fv.Set(x) |
| return nil |
| } |
| case *reflect.PtrValue: |
| // A basic field (indirected through pointer), or a repeated message/group |
| p.back() |
| fv.PointTo(reflect.MakeZero(fv.Type().(*reflect.PtrType).Elem())) |
| return p.readAny(fv.Elem(), props) |
| case *reflect.StringValue: |
| if tok.value[0] == '"' { |
| fv.Set(tok.unquoted) |
| return nil |
| } |
| case *reflect.StructValue: |
| var terminator string |
| switch tok.value { |
| case "{": |
| terminator = "}" |
| case "<": |
| terminator = ">" |
| default: |
| return p.error("expected '{' or '<', found %q", tok.value) |
| } |
| return p.readStruct(fv, terminator) |
| case *reflect.Uint32Value: |
| if x, err := strconv.Atoui64(tok.value); err == nil && x <= maxUint32 { |
| fv.Set(uint32(x)) |
| return nil |
| } |
| case *reflect.Uint64Value: |
| if x, err := strconv.Atoui64(tok.value); err == nil { |
| fv.Set(x) |
| return nil |
| } |
| } |
| return p.error("invalid %v: %v", v.Type(), tok.value) |
| } |
| |
| var notPtrStruct os.Error = &ParseError{"destination is not a pointer to a struct", 0, 0} |
| |
| // UnmarshalText reads a protobuffer in Text format. |
| func UnmarshalText(s string, pb interface{}) os.Error { |
| pv, ok := reflect.NewValue(pb).(*reflect.PtrValue) |
| if !ok { |
| return notPtrStruct |
| } |
| sv, ok := pv.Elem().(*reflect.StructValue) |
| if !ok { |
| return notPtrStruct |
| } |
| if pe := newTextParser(s).readStruct(sv, ""); pe != nil { |
| return pe |
| } |
| return nil |
| } |