Blame - proto/text_parser.go - platform/external/golang-protobuf

blob: e1c0832e81b1977d87c81d4fd90ec23072501a48 [file] [log] [blame]

Rob Pike	aaa3a62	2010-03-20 22:32:34 -0700	[diff] [blame^]	1	// Go support for Protocol Buffers - Google's data interchange format
				2	//
				3	// Copyright 2010 Google Inc. All rights reserved.
				4	// http://code.google.com/p/goprotobuf/
				5	//
				6	// Redistribution and use in source and binary forms, with or without
				7	// modification, are permitted provided that the following conditions are
				8	// met:
				9	//
				10	// * Redistributions of source code must retain the above copyright
				11	// notice, this list of conditions and the following disclaimer.
				12	// * Redistributions in binary form must reproduce the above
				13	// copyright notice, this list of conditions and the following disclaimer
				14	// in the documentation and/or other materials provided with the
				15	// distribution.
				16	// * Neither the name of Google Inc. nor the names of its
				17	// contributors may be used to endorse or promote products derived from
				18	// this software without specific prior written permission.
				19	//
				20	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				21	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				22	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				23	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				24	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				25	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				26	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				27	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				28	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				29	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				30	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				31
				32	package proto
				33
				34	// Functions for parsing the Text protocol buffer format.
				35	// TODO:
				36	// - groups.
				37
				38	import (
				39	"fmt"
				40	"os"
				41	"reflect"
				42	"strconv"
				43	)
				44
				45	// ParseError satisfies the os.Error interface.
				46	type ParseError struct {
				47	Message string
				48	Line int // 1-based line number
				49	Offset int // 0-based byte offset from start of input
				50	}
				51
				52	func (p *ParseError) String() string {
				53	if p.Line == 1 {
				54	// show offset only for first line
				55	return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
				56	}
				57	return fmt.Sprintf("line %d: %v", p.Line, p.Message)
				58	}
				59
				60	type token struct {
				61	value string
				62	err *ParseError
				63	line int // line number
				64	offset int // byte number from start of input, not start of line
				65	unquoted string // the unquoted version of value, if it was a quoted string
				66	}
				67
				68	func (t *token) String() string {
				69	if t.err == nil {
				70	return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
				71	}
				72	return fmt.Sprintf("parse error: %v", t.err)
				73	}
				74
				75	type textParser struct {
				76	s string // remaining input
				77	done bool // whether the parsing is finished (success or error)
				78	backed bool // whether back() was called
				79	offset, line int
				80	cur token
				81	}
				82
				83	func newTextParser(s string) *textParser {
				84	p := new(textParser)
				85	p.s = s
				86	p.line = 1
				87	p.cur.line = 1
				88	return p
				89	}
				90
				91	func (p textParser) error(format string, a ...interface{}) ParseError {
				92	pe := &ParseError{fmt.Sprintf(format, a), p.cur.line, p.cur.offset}
				93	p.cur.err = pe
				94	p.done = true
				95	return pe
				96	}
				97
				98	// Numbers and identifiers are matched by [-+._A-Za-z0-9]
				99	func isIdentOrNumberChar(c byte) bool {
				100	switch {
				101	case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
				102	return true
				103	case '0' <= c && c <= '9':
				104	return true
				105	}
				106	switch c {
				107	case '-', '+', '.', '_':
				108	return true
				109	}
				110	return false
				111	}
				112
				113	func isWhitespace(c byte) bool {
				114	switch c {
				115	case ' ', '\t', '\n', '\r':
				116	return true
				117	}
				118	return false
				119	}
				120
				121	func (p *textParser) skipWhitespace() {
				122	i := 0
				123	for i < len(p.s) && (isWhitespace(p.s[i]) \|\| p.s[i] == '#') {
				124	if p.s[i] == '#' {
				125	// comment; skip to end of line or input
				126	for i < len(p.s) && p.s[i] != '\n' {
				127	i++
				128	}
				129	if i == len(p.s) {
				130	break
				131	}
				132	}
				133	if p.s[i] == '\n' {
				134	p.line++
				135	}
				136	i++
				137	}
				138	p.offset += i
				139	p.s = p.s[i:len(p.s)]
				140	if len(p.s) == 0 {
				141	p.done = true
				142	}
				143	}
				144
				145	func (p *textParser) advance() {
				146	// Skip whitespace
				147	p.skipWhitespace()
				148	if p.done {
				149	return
				150	}
				151
				152	// Start of non-whitespace
				153	p.cur.err = nil
				154	p.cur.offset, p.cur.line = p.offset, p.line
				155	p.cur.unquoted = ""
				156	switch p.s[0] {
				157	case '<', '>', '{', '}', ':':
				158	// Single symbol
				159	p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
				160	case '"':
				161	// Quoted string
				162	i := 1
				163	for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
				164	if p.s[i] == '\\' && i+1 < len(p.s) {
				165	// skip escaped char
				166	i++
				167	}
				168	i++
				169	}
				170	if i >= len(p.s) \|\| p.s[i] != '"' {
				171	p.error("unmatched quote")
				172	return
				173	}
				174	// TODO: Should be UnquoteC.
				175	unq, err := strconv.Unquote(p.s[0 : i+1])
				176	if err != nil {
				177	p.error("invalid quoted string %v", p.s[0:i+1])
				178	return
				179	}
				180	p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
				181	p.cur.unquoted = unq
				182	default:
				183	i := 0
				184	for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
				185	i++
				186	}
				187	if i == 0 {
				188	p.error("unexpected byte %#x", p.s[0])
				189	return
				190	}
				191	p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
				192	}
				193	p.offset += len(p.cur.value)
				194	}
				195
				196	// Back off the parser by one token. Can only be done between calls to next().
				197	// It makes the next advance() a no-op.
				198	func (p *textParser) back() { p.backed = true }
				199
				200	// Advances the parser and returns the new current token.
				201	func (p textParser) next() token {
				202	if p.backed \|\| p.done {
				203	p.backed = false
				204	return &p.cur
				205	}
				206	p.advance()
				207	if p.done {
				208	p.cur.value = ""
				209	} else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
				210	// Look for multiple quoted strings separated by whitespace,
				211	// and concatenate them.
				212	cat := p.cur
				213	for {
				214	p.skipWhitespace()
				215	if p.done \|\| p.s[0] != '"' {
				216	break
				217	}
				218	p.advance()
				219	if p.cur.err != nil {
				220	return &p.cur
				221	}
				222	cat.value += " " + p.cur.value
				223	cat.unquoted += p.cur.unquoted
				224	}
				225	p.done = false // parser may have seen EOF, but we want to return cat
				226	p.cur = cat
				227	}
				228	return &p.cur
				229	}
				230
				231	type nillable interface {
				232	IsNil() bool
				233	}
				234
				235	// Return an error indicating which required field was not set.
				236	func (p textParser) missingRequiredFieldError(sv reflect.StructValue) *ParseError {
				237	st := sv.Type().(*reflect.StructType)
				238	sprops := GetProperties(st)
				239	for i := 0; i < st.NumField(); i++ {
				240	// All protocol buffer fields are nillable, but let's be careful.
				241	nfv, ok := sv.Field(i).(nillable)
				242	if !ok \|\| !nfv.IsNil() {
				243	continue
				244	}
				245
				246	props := sprops.Prop[i]
				247	if props.Required {
				248	return p.error("message %v missing required field %q", st, props.OrigName)
				249	}
				250	}
				251	return p.error("message %v missing required field", st) // should not happen
				252	}
				253
				254	// Returns the index in the struct for the named field, as well as the parsed tag properties.
				255	func structFieldByName(st reflect.StructType, name string) (int, Properties, bool) {
				256	sprops := GetProperties(st)
				257	for i := 0; i < st.NumField(); i++ {
				258	props := sprops.Prop[i]
				259	if props.OrigName == name {
				260	return i, props, true
				261	}
				262	}
				263	return -1, nil, false
				264	}
				265
				266	func (p textParser) readStruct(sv reflect.StructValue, terminator string) *ParseError {
				267	st := sv.Type().(*reflect.StructType)
				268	reqCount := GetProperties(st).reqCount
				269	// A struct is a sequence of "name: value", terminated by one of
				270	// '>' or '}', or the end of the input.
				271	for {
				272	tok := p.next()
				273	if tok.err != nil {
				274	return tok.err
				275	}
				276	if tok.value == terminator {
				277	break
				278	}
				279
				280	fi, props, ok := structFieldByName(st, tok.value)
				281	if !ok {
				282	return p.error("unknown field name %q in %v", tok.value, st)
				283	}
				284
				285	// Check that it's not already set if it's not a repeated field.
				286	if !props.Repeated {
				287	if nfv, ok := sv.Field(fi).(nillable); ok && !nfv.IsNil() {
				288	return p.error("non-repeated field %q was repeated", tok.value)
				289	}
				290	}
				291
				292	tok = p.next()
				293	if tok.err != nil {
				294	return tok.err
				295	}
				296	if tok.value != ":" {
				297	// Colon is optional when the field is a group or message.
				298	needColon := true
				299	switch props.Wire {
				300	case "group":
				301	needColon = false
				302	case "bytes":
				303	// A "bytes" field is either a message, a string, or a repeated field;
				304	// those three become T, string and []T respectively, so we can check for
				305	// this field being a pointer to a non-string.
				306	typ := st.Field(fi).Type
				307	pt, ok := typ.(*reflect.PtrType)
				308	if !ok {
				309	break
				310	}
				311	_, ok = pt.Elem().(*reflect.StringType)
				312	if ok {
				313	break
				314	}
				315	needColon = false
				316	}
				317	if needColon {
				318	return p.error("expected ':', found %q", tok.value)
				319	}
				320	p.back()
				321	}
				322
				323	// Parse into the field.
				324	if err := p.readAny(sv.Field(fi), props); err != nil {
				325	return err
				326	}
				327
				328	if props.Required {
				329	reqCount--
				330	}
				331	}
				332
				333	if reqCount > 0 {
				334	return p.missingRequiredFieldError(sv)
				335	}
				336	return nil
				337	}
				338
				339	const (
				340	minInt32 = -1 << 31
				341	maxInt32 = 1<<31 - 1
				342	maxUint32 = 1<<32 - 1
				343	)
				344
				345	func (p textParser) readAny(v reflect.Value, props Properties) *ParseError {
				346	tok := p.next()
				347	if tok.err != nil {
				348	return tok.err
				349	}
				350	if tok.value == "" {
				351	return p.error("unexpected EOF")
				352	}
				353
				354	switch fv := v.(type) {
				355	case *reflect.SliceValue:
				356	at := v.Type().(*reflect.SliceType)
				357	if _, ok := at.Elem().(*reflect.Uint8Type); ok {
				358	// Special case for []byte
				359	if tok.value[0] != '"' {
				360	// Deliberately written out here, as the error after
				361	// this switch statement would write "invalid []byte: ...",
				362	// which is not as user-friendly.
				363	return p.error("invalid string: %v", tok.value)
				364	}
				365	bytes := []byte(tok.unquoted)
				366	fv.Set(reflect.NewValue(bytes).(*reflect.SliceValue))
				367	return nil
				368	}
				369	// Repeated field. May already exist.
				370	cnt := fv.Len()
				371	nav := reflect.MakeSlice(at, cnt, cnt+1)
				372	reflect.ArrayCopy(nav, fv)
				373	fv.Set(nav)
				374	fv.SetLen(cnt + 1)
				375
				376	// Read one.
				377	p.back()
				378	return p.readAny(fv.Elem(cnt), nil) // TODO: pass properties?
				379	case *reflect.BoolValue:
				380	// Either "true", "false", 1 or 0.
				381	switch tok.value {
				382	case "true", "1":
				383	fv.Set(true)
				384	return nil
				385	case "false", "0":
				386	fv.Set(false)
				387	return nil
				388	}
				389	case *reflect.Float32Value:
				390	if f, err := strconv.Atof32(tok.value); err == nil {
				391	fv.Set(f)
				392	return nil
				393	}
				394	case *reflect.Float64Value:
				395	if f, err := strconv.Atof64(tok.value); err == nil {
				396	fv.Set(f)
				397	return nil
				398	}
				399	case *reflect.Int32Value:
				400	if x, err := strconv.Atoi64(tok.value); err == nil && minInt32 <= x && x <= maxInt32 {
				401	fv.Set(int32(x))
				402	return nil
				403	}
				404	if len(props.Enum) == 0 {
				405	break
				406	}
				407	m, ok := enumValueMaps[props.Enum]
				408	if !ok {
				409	break
				410	}
				411	x, ok := m[tok.value]
				412	if !ok {
				413	break
				414	}
				415	fv.Set(x)
				416	return nil
				417	case *reflect.Int64Value:
				418	if x, err := strconv.Atoi64(tok.value); err == nil {
				419	fv.Set(x)
				420	return nil
				421	}
				422	case *reflect.PtrValue:
				423	// A basic field (indirected through pointer), or a repeated message/group
				424	p.back()
				425	fv.PointTo(reflect.MakeZero(fv.Type().(*reflect.PtrType).Elem()))
				426	return p.readAny(fv.Elem(), props)
				427	case *reflect.StringValue:
				428	if tok.value[0] == '"' {
				429	fv.Set(tok.unquoted)
				430	return nil
				431	}
				432	case *reflect.StructValue:
				433	var terminator string
				434	switch tok.value {
				435	case "{":
				436	terminator = "}"
				437	case "<":
				438	terminator = ">"
				439	default:
				440	return p.error("expected '{' or '<', found %q", tok.value)
				441	}
				442	return p.readStruct(fv, terminator)
				443	case *reflect.Uint32Value:
				444	if x, err := strconv.Atoui64(tok.value); err == nil && x <= maxUint32 {
				445	fv.Set(uint32(x))
				446	return nil
				447	}
				448	case *reflect.Uint64Value:
				449	if x, err := strconv.Atoui64(tok.value); err == nil {
				450	fv.Set(x)
				451	return nil
				452	}
				453	}
				454	return p.error("invalid %v: %v", v.Type(), tok.value)
				455	}
				456
				457	var notPtrStruct os.Error = &ParseError{"destination is not a pointer to a struct", 0, 0}
				458
				459	// UnmarshalText reads a protobuffer in Text format.
				460	func UnmarshalText(s string, pb interface{}) os.Error {
				461	pv, ok := reflect.NewValue(pb).(*reflect.PtrValue)
				462	if !ok {
				463	return notPtrStruct
				464	}
				465	sv, ok := pv.Elem().(*reflect.StructValue)
				466	if !ok {
				467	return notPtrStruct
				468	}
				469	if pe := newTextParser(s).readStruct(sv, ""); pe != nil {
				470	return pe
				471	}
				472	return nil
				473	}