Go support for protocol buffers.
Consists of a compiler plugin and the support library, all written in Go.
This is a complete implementation except for:
- Extensions in the plugin
- coming soon
- support is already in the library
- Services (RPC)
- needs an external definition to honor before supporting.
- Insertion points in the plugin
- may come
R=rsc, dsymonds1, ken2
CC=golang-dev
http://codereview.appspot.com/676041
diff --git a/proto/text_parser.go b/proto/text_parser.go
new file mode 100644
index 0000000..e1c0832
--- /dev/null
+++ b/proto/text_parser.go
@@ -0,0 +1,473 @@
+// Go support for Protocol Buffers - Google's data interchange format
+//
+// Copyright 2010 Google Inc. All rights reserved.
+// http://code.google.com/p/goprotobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package proto
+
+// Functions for parsing the Text protocol buffer format.
+// TODO:
+// - groups.
+
+import (
+ "fmt"
+ "os"
+ "reflect"
+ "strconv"
+)
+
+// ParseError satisfies the os.Error interface.
+type ParseError struct {
+ Message string
+ Line int // 1-based line number
+ Offset int // 0-based byte offset from start of input
+}
+
+func (p *ParseError) String() string {
+ if p.Line == 1 {
+ // show offset only for first line
+ return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
+ }
+ return fmt.Sprintf("line %d: %v", p.Line, p.Message)
+}
+
+type token struct {
+ value string
+ err *ParseError
+ line int // line number
+ offset int // byte number from start of input, not start of line
+ unquoted string // the unquoted version of value, if it was a quoted string
+}
+
+func (t *token) String() string {
+ if t.err == nil {
+ return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
+ }
+ return fmt.Sprintf("parse error: %v", t.err)
+}
+
+type textParser struct {
+ s string // remaining input
+ done bool // whether the parsing is finished (success or error)
+ backed bool // whether back() was called
+ offset, line int
+ cur token
+}
+
+func newTextParser(s string) *textParser {
+ p := new(textParser)
+ p.s = s
+ p.line = 1
+ p.cur.line = 1
+ return p
+}
+
+func (p *textParser) error(format string, a ...interface{}) *ParseError {
+ pe := &ParseError{fmt.Sprintf(format, a), p.cur.line, p.cur.offset}
+ p.cur.err = pe
+ p.done = true
+ return pe
+}
+
+// Numbers and identifiers are matched by [-+._A-Za-z0-9]
+func isIdentOrNumberChar(c byte) bool {
+ switch {
+ case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
+ return true
+ case '0' <= c && c <= '9':
+ return true
+ }
+ switch c {
+ case '-', '+', '.', '_':
+ return true
+ }
+ return false
+}
+
+func isWhitespace(c byte) bool {
+ switch c {
+ case ' ', '\t', '\n', '\r':
+ return true
+ }
+ return false
+}
+
+func (p *textParser) skipWhitespace() {
+ i := 0
+ for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
+ if p.s[i] == '#' {
+ // comment; skip to end of line or input
+ for i < len(p.s) && p.s[i] != '\n' {
+ i++
+ }
+ if i == len(p.s) {
+ break
+ }
+ }
+ if p.s[i] == '\n' {
+ p.line++
+ }
+ i++
+ }
+ p.offset += i
+ p.s = p.s[i:len(p.s)]
+ if len(p.s) == 0 {
+ p.done = true
+ }
+}
+
+func (p *textParser) advance() {
+ // Skip whitespace
+ p.skipWhitespace()
+ if p.done {
+ return
+ }
+
+ // Start of non-whitespace
+ p.cur.err = nil
+ p.cur.offset, p.cur.line = p.offset, p.line
+ p.cur.unquoted = ""
+ switch p.s[0] {
+ case '<', '>', '{', '}', ':':
+ // Single symbol
+ p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
+ case '"':
+ // Quoted string
+ i := 1
+ for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
+ if p.s[i] == '\\' && i+1 < len(p.s) {
+ // skip escaped char
+ i++
+ }
+ i++
+ }
+ if i >= len(p.s) || p.s[i] != '"' {
+ p.error("unmatched quote")
+ return
+ }
+ // TODO: Should be UnquoteC.
+ unq, err := strconv.Unquote(p.s[0 : i+1])
+ if err != nil {
+ p.error("invalid quoted string %v", p.s[0:i+1])
+ return
+ }
+ p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
+ p.cur.unquoted = unq
+ default:
+ i := 0
+ for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
+ i++
+ }
+ if i == 0 {
+ p.error("unexpected byte %#x", p.s[0])
+ return
+ }
+ p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
+ }
+ p.offset += len(p.cur.value)
+}
+
+// Back off the parser by one token. Can only be done between calls to next().
+// It makes the next advance() a no-op.
+func (p *textParser) back() { p.backed = true }
+
+// Advances the parser and returns the new current token.
+func (p *textParser) next() *token {
+ if p.backed || p.done {
+ p.backed = false
+ return &p.cur
+ }
+ p.advance()
+ if p.done {
+ p.cur.value = ""
+ } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
+ // Look for multiple quoted strings separated by whitespace,
+ // and concatenate them.
+ cat := p.cur
+ for {
+ p.skipWhitespace()
+ if p.done || p.s[0] != '"' {
+ break
+ }
+ p.advance()
+ if p.cur.err != nil {
+ return &p.cur
+ }
+ cat.value += " " + p.cur.value
+ cat.unquoted += p.cur.unquoted
+ }
+ p.done = false // parser may have seen EOF, but we want to return cat
+ p.cur = cat
+ }
+ return &p.cur
+}
+
+type nillable interface {
+ IsNil() bool
+}
+
+// Return an error indicating which required field was not set.
+func (p *textParser) missingRequiredFieldError(sv *reflect.StructValue) *ParseError {
+ st := sv.Type().(*reflect.StructType)
+ sprops := GetProperties(st)
+ for i := 0; i < st.NumField(); i++ {
+ // All protocol buffer fields are nillable, but let's be careful.
+ nfv, ok := sv.Field(i).(nillable)
+ if !ok || !nfv.IsNil() {
+ continue
+ }
+
+ props := sprops.Prop[i]
+ if props.Required {
+ return p.error("message %v missing required field %q", st, props.OrigName)
+ }
+ }
+ return p.error("message %v missing required field", st) // should not happen
+}
+
+// Returns the index in the struct for the named field, as well as the parsed tag properties.
+func structFieldByName(st *reflect.StructType, name string) (int, *Properties, bool) {
+ sprops := GetProperties(st)
+ for i := 0; i < st.NumField(); i++ {
+ props := sprops.Prop[i]
+ if props.OrigName == name {
+ return i, props, true
+ }
+ }
+ return -1, nil, false
+}
+
+func (p *textParser) readStruct(sv *reflect.StructValue, terminator string) *ParseError {
+ st := sv.Type().(*reflect.StructType)
+ reqCount := GetProperties(st).reqCount
+ // A struct is a sequence of "name: value", terminated by one of
+ // '>' or '}', or the end of the input.
+ for {
+ tok := p.next()
+ if tok.err != nil {
+ return tok.err
+ }
+ if tok.value == terminator {
+ break
+ }
+
+ fi, props, ok := structFieldByName(st, tok.value)
+ if !ok {
+ return p.error("unknown field name %q in %v", tok.value, st)
+ }
+
+ // Check that it's not already set if it's not a repeated field.
+ if !props.Repeated {
+ if nfv, ok := sv.Field(fi).(nillable); ok && !nfv.IsNil() {
+ return p.error("non-repeated field %q was repeated", tok.value)
+ }
+ }
+
+ tok = p.next()
+ if tok.err != nil {
+ return tok.err
+ }
+ if tok.value != ":" {
+ // Colon is optional when the field is a group or message.
+ needColon := true
+ switch props.Wire {
+ case "group":
+ needColon = false
+ case "bytes":
+ // A "bytes" field is either a message, a string, or a repeated field;
+ // those three become *T, *string and []T respectively, so we can check for
+ // this field being a pointer to a non-string.
+ typ := st.Field(fi).Type
+ pt, ok := typ.(*reflect.PtrType)
+ if !ok {
+ break
+ }
+ _, ok = pt.Elem().(*reflect.StringType)
+ if ok {
+ break
+ }
+ needColon = false
+ }
+ if needColon {
+ return p.error("expected ':', found %q", tok.value)
+ }
+ p.back()
+ }
+
+ // Parse into the field.
+ if err := p.readAny(sv.Field(fi), props); err != nil {
+ return err
+ }
+
+ if props.Required {
+ reqCount--
+ }
+ }
+
+ if reqCount > 0 {
+ return p.missingRequiredFieldError(sv)
+ }
+ return nil
+}
+
+const (
+ minInt32 = -1 << 31
+ maxInt32 = 1<<31 - 1
+ maxUint32 = 1<<32 - 1
+)
+
+func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
+ tok := p.next()
+ if tok.err != nil {
+ return tok.err
+ }
+ if tok.value == "" {
+ return p.error("unexpected EOF")
+ }
+
+ switch fv := v.(type) {
+ case *reflect.SliceValue:
+ at := v.Type().(*reflect.SliceType)
+ if _, ok := at.Elem().(*reflect.Uint8Type); ok {
+ // Special case for []byte
+ if tok.value[0] != '"' {
+ // Deliberately written out here, as the error after
+ // this switch statement would write "invalid []byte: ...",
+ // which is not as user-friendly.
+ return p.error("invalid string: %v", tok.value)
+ }
+ bytes := []byte(tok.unquoted)
+ fv.Set(reflect.NewValue(bytes).(*reflect.SliceValue))
+ return nil
+ }
+ // Repeated field. May already exist.
+ cnt := fv.Len()
+ nav := reflect.MakeSlice(at, cnt, cnt+1)
+ reflect.ArrayCopy(nav, fv)
+ fv.Set(nav)
+ fv.SetLen(cnt + 1)
+
+ // Read one.
+ p.back()
+ return p.readAny(fv.Elem(cnt), nil) // TODO: pass properties?
+ case *reflect.BoolValue:
+ // Either "true", "false", 1 or 0.
+ switch tok.value {
+ case "true", "1":
+ fv.Set(true)
+ return nil
+ case "false", "0":
+ fv.Set(false)
+ return nil
+ }
+ case *reflect.Float32Value:
+ if f, err := strconv.Atof32(tok.value); err == nil {
+ fv.Set(f)
+ return nil
+ }
+ case *reflect.Float64Value:
+ if f, err := strconv.Atof64(tok.value); err == nil {
+ fv.Set(f)
+ return nil
+ }
+ case *reflect.Int32Value:
+ if x, err := strconv.Atoi64(tok.value); err == nil && minInt32 <= x && x <= maxInt32 {
+ fv.Set(int32(x))
+ return nil
+ }
+ if len(props.Enum) == 0 {
+ break
+ }
+ m, ok := enumValueMaps[props.Enum]
+ if !ok {
+ break
+ }
+ x, ok := m[tok.value]
+ if !ok {
+ break
+ }
+ fv.Set(x)
+ return nil
+ case *reflect.Int64Value:
+ if x, err := strconv.Atoi64(tok.value); err == nil {
+ fv.Set(x)
+ return nil
+ }
+ case *reflect.PtrValue:
+ // A basic field (indirected through pointer), or a repeated message/group
+ p.back()
+ fv.PointTo(reflect.MakeZero(fv.Type().(*reflect.PtrType).Elem()))
+ return p.readAny(fv.Elem(), props)
+ case *reflect.StringValue:
+ if tok.value[0] == '"' {
+ fv.Set(tok.unquoted)
+ return nil
+ }
+ case *reflect.StructValue:
+ var terminator string
+ switch tok.value {
+ case "{":
+ terminator = "}"
+ case "<":
+ terminator = ">"
+ default:
+ return p.error("expected '{' or '<', found %q", tok.value)
+ }
+ return p.readStruct(fv, terminator)
+ case *reflect.Uint32Value:
+ if x, err := strconv.Atoui64(tok.value); err == nil && x <= maxUint32 {
+ fv.Set(uint32(x))
+ return nil
+ }
+ case *reflect.Uint64Value:
+ if x, err := strconv.Atoui64(tok.value); err == nil {
+ fv.Set(x)
+ return nil
+ }
+ }
+ return p.error("invalid %v: %v", v.Type(), tok.value)
+}
+
+var notPtrStruct os.Error = &ParseError{"destination is not a pointer to a struct", 0, 0}
+
+// UnmarshalText reads a protobuffer in Text format.
+func UnmarshalText(s string, pb interface{}) os.Error {
+ pv, ok := reflect.NewValue(pb).(*reflect.PtrValue)
+ if !ok {
+ return notPtrStruct
+ }
+ sv, ok := pv.Elem().(*reflect.StructValue)
+ if !ok {
+ return notPtrStruct
+ }
+ if pe := newTextParser(s).readStruct(sv, ""); pe != nil {
+ return pe
+ }
+ return nil
+}